In [3]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from sklearn.impute import SimpleImputer

**Cleaning Health15 Dataset**

In [4]:
health15 = pd.read_csv("HIC/Health Insurance Characteristics 2015.csv")

# Counting and dropping all null values within each dataset
health15.dropna(inplace = True)

# Resetting the index for each dataset
rows = []
for i in range(0, health15.shape[0]):
    rows.append(i)

health15[''] = rows
health15 = health15.set_index('')

Category = []

for i in health15['Label (Grouping)']:
    Category.append(i.strip())

health15['Label (Grouping)'] = Category

newcolumns15 = list(health15['Label (Grouping)'])

# Transposing so the states become the rows and the categories become the columns
health15 = health15.transpose()

categories = list(health15.index)[1:]
Counties = ['County']
States = ['State']
Insurance = ['PopCategory']

for i in categories:
    state = i.split(',')[1]
    Counties.append(i.split(',')[0])
    States.append(state.split('!!')[0])
    Insurance.append(state.split('!!')[1])

health15['61'] = Counties
health15['62'] = States
health15['63'] = Insurance

newcolumns15.append('County')
newcolumns15.append('State')
newcolumns15.append('PopCategory')

health15.columns = newcolumns15

health15.drop(index = 'Label (Grouping)', inplace = True)

indeces = []

for i in range(0, health15.shape[0]):
    indeces.append(i)
health15[''] = indeces
health15.set_index('', inplace = True)

health15.replace('N', 0, inplace = True)

dropColumns = ['18 to 64 years', '65 years and older', \
    'Two or more races', 'White alone, not Hispanic or Latino', \
    'In family households', 'In married couple families', 'In other families', 'Male householder, no wife present', \
    'Female householder, no husband present', 'In non-family households and other living arrangements', \
    'With a disability', 'No disability', 'Civilian noninstitutionalized population 25 years and over', \
    'Civilian noninstitutionalized population 18 years and over', 'In labor force', 'Employed', \
    'Unemployed', 'Not in labor force', 'Civilian noninstitutionalized population 18 to 64 years', \
    'Worked full-time, year round in the past 12 months', 'Worked less than full-time, year round in the past 12 months', \
    'Did not work', 'Total household population', 'Civilian noninstitutionalized population for whom poverty status is determined', \
    'Below 138 percent of the poverty threshold', '138 to 199 percent of the poverty threshold', \
    '200 to 399 percent of the poverty threshold', 'At or above 400 percent of the poverty threshold']
health15.drop(columns = dropColumns, inplace = True)
health15.rename(columns = {'Civilian noninstitutionalized population':'Total Population', 'PopCategory':'Insurance Category'}, inplace = True)

sortColumns = ['State', 'County', 'Insurance Category', 'Total Population', 'Under 6 years', '6 to 17 years', 'Under 18 years', '18 to 24 years', '19 to 25 years', '25 to 34 years', '35 to 44 years', '45 to 54 years', '55 to 64 years', \
    '65 to 74 years', '75 years and older', 'Male', 'Female', 'White alone', 'Black or African American alone', 'American Indian and Alaska Native alone', 'Asian alone', \
    'Native Hawaiian and Other Pacific Islander alone', 'Some other race alone', 'Hispanic or Latino (of any race)', 'Native born', 'Foreign born', 'Naturalized', \
    'Not a citizen', 'Less than high school graduate', 'High school graduate (includes equivalency)', "Some college or associate's degree", "Bachelor's degree or higher", \
    'Under $25,000', '$25,000 to $49,999', '$50,000 to $74,999', '$75,000 to $99,999', '$100,000 and over']
health15 = health15[sortColumns]

columns = list(health15.columns)[3:]
for i in columns:
       health15[i] = health15[i].str.replace(',', '')
       health15[i] = pd.to_numeric(health15[i])

# Imputing the 0 values for the mean of the population
health15.fillna(0, inplace = True)
imp = SimpleImputer(strategy = 'mean', missing_values = 0)

health15[columns] = imp.fit_transform(health15[columns])
health15[columns] = health15[columns].astype(int)

health15['State'] = health15['State'].str.lstrip()
health15['State'] = health15['State'].str.replace(' ', '_')
health15['State'] = health15['State'].replace('District_of_Columbia', 'DC')

health15['6 to 18 years'] = (health15['Under 18 years'] - health15['6 to 17 years']) + health15['6 to 17 years']
health15.drop(columns = ['6 to 17 years', 'Under 18 years'], inplace = True)

health15['26 to 34 years'] = health15['25 to 34 years'] - (health15['19 to 25 years'] - (health15['Under 6 years'] + health15['6 to 18 years'] + health15['18 to 24 years']))
health15.drop(columns = ['25 to 34 years', '18 to 24 years'], inplace = True)

health15 = health15[['State', 'County', 'Insurance Category', 'Total Population',
       'Under 6 years', '6 to 18 years',  '19 to 25 years', '26 to 34 years', '35 to 44 years',
       '45 to 54 years', '55 to 64 years', '65 to 74 years',
       '75 years and older', 'Male', 'Female', 'White alone',
       'Black or African American alone',
       'American Indian and Alaska Native alone', 'Asian alone',
       'Native Hawaiian and Other Pacific Islander alone',
       'Some other race alone', 'Hispanic or Latino (of any race)',
       'Native born', 'Foreign born', 'Naturalized', 'Not a citizen',
       'Less than high school graduate',
       'High school graduate (includes equivalency)',
       "Some college or associate's degree", "Bachelor's degree or higher",
       'Under $25,000', '$25,000 to $49,999', '$50,000 to $74,999',
       '$75,000 to $99,999', '$100,000 and over']]

health15.columns = ['State', 'County', 'Insurance_Category', \
    'Total_Population', 'Under_6Y', '_6_to_18Y', '_19_to_25Y', '_26_to_34Y', '_35_to_44Y', \
    '_45_to_54Y', '_55_to_64Y', '_65_to_74Y', '_75_and_Older', 'Male', \
    'Female', 'White', 'African_American', 'American_Indian', 'Asian', 'Pacific_Islander', \
    'Some_Other_Race', 'Hispanic', \
    'Native_Born', 'Foreign_Born', 'Naturalized', 'Not_A_Citizen', \
    'Less_Than_High_School', 'High_School_or_Equivalent', \
    "Some_College", "Bachelors_or_Higher", 'Under_25000S', '_25000_to_49999S', \
    '_50000_to_74999S', '_75000_to_99999S', 'Over_100000S']

health15 = health15[health15['Insurance_Category'] != 'Total']

**Cleaning Health16 Dataset**

In [5]:
health16 = pd.read_csv("HIC/Health Insurance Characteristics 2016.csv")

# Counting and dropping all null values within each dataset
health16.dropna(inplace = True)

# Resetting the index for each dataset
rows = []
for i in range(0, health16.shape[0]):
    rows.append(i)

health16[''] = rows
health16 = health16.set_index('')

Category = []

for i in health16['Label (Grouping)']:
    Category.append(i.strip())

health16['Label (Grouping)'] = Category

newcolumns15 = list(health16['Label (Grouping)'])

# Transposing so the states become the rows and the categories become the columns
health16 = health16.transpose()

categories = list(health16.index)[1:]
Counties = ['County']
States = ['State']
Insurance = ['PopCategory']

for i in categories:
    state = i.split(',')[1]
    Counties.append(i.split(',')[0])
    States.append(state.split('!!')[0])
    Insurance.append(state.split('!!')[1])

health16['61'] = Counties
health16['62'] = States
health16['63'] = Insurance

newcolumns15.append('County')
newcolumns15.append('State')
newcolumns15.append('PopCategory')

health16.columns = newcolumns15

health16.drop(index = 'Label (Grouping)', inplace = True)

indeces = []

for i in range(0, health16.shape[0]):
    indeces.append(i)
health16[''] = indeces
health16.set_index('', inplace = True)

health16.replace('N', 0, inplace = True)

dropColumns = ['18 to 64 years', '65 years and older', \
    'Two or more races', 'White alone, not Hispanic or Latino', \
    'In family households', 'In married couple families', 'In other families', 'Male householder, no wife present', \
    'Female householder, no husband present', 'In non-family households and other living arrangements', \
    'With a disability', 'No disability', 'Civilian noninstitutionalized population 25 years and over', \
    'Civilian noninstitutionalized population 18 years and over', 'In labor force', 'Employed', \
    'Unemployed', 'Not in labor force', 'Civilian noninstitutionalized population 18 to 64 years', \
    'Worked full-time, year round in the past 12 months', 'Worked less than full-time, year round in the past 12 months', \
    'Did not work', 'Total household population', 'Civilian noninstitutionalized population for whom poverty status is determined', \
    'Below 138 percent of the poverty threshold', '138 to 199 percent of the poverty threshold', \
    '200 to 399 percent of the poverty threshold', 'At or above 400 percent of the poverty threshold']
health16.drop(columns = dropColumns, inplace = True)
health16.rename(columns = {'Civilian noninstitutionalized population':'Total Population', 'PopCategory':'Insurance Category'}, inplace = True)

sortColumns = ['State', 'County', 'Insurance Category', 'Total Population', 'Under 6 years', '6 to 17 years', 'Under 18 years', '18 to 24 years', '19 to 25 years', '25 to 34 years', '35 to 44 years', '45 to 54 years', '55 to 64 years', \
    '65 to 74 years', '75 years and older', 'Male', 'Female', 'White alone', 'Black or African American alone', 'American Indian and Alaska Native alone', 'Asian alone', \
    'Native Hawaiian and Other Pacific Islander alone', 'Some other race alone', 'Hispanic or Latino (of any race)', 'Native born', 'Foreign born', 'Naturalized', \
    'Not a citizen', 'Less than high school graduate', 'High school graduate (includes equivalency)', "Some college or associate's degree", "Bachelor's degree or higher", \
    'Under $25,000', '$25,000 to $49,999', '$50,000 to $74,999', '$75,000 to $99,999', '$100,000 and over']
health16 = health16[sortColumns]

columns = list(health16.columns)[3:]
for i in columns:
       health16[i] = health16[i].str.replace(',', '')
       health16[i] = pd.to_numeric(health16[i])

# Imputing the 0 values for the mean of the population
health16.fillna(0, inplace = True)
imp = SimpleImputer(strategy = 'mean', missing_values = 0)

health16[columns] = imp.fit_transform(health16[columns])
health16[columns] = health16[columns].astype(int)

health16['State'] = health16['State'].str.lstrip()
health16['State'] = health16['State'].str.replace(' ', '_')
health16['State'] = health16['State'].replace('District_of_Columbia', 'DC')

health16['6 to 18 years'] = (health16['Under 18 years'] - health16['6 to 17 years']) + health16['6 to 17 years']
health16.drop(columns = ['6 to 17 years', 'Under 18 years'], inplace = True)

health16['26 to 34 years'] = health16['25 to 34 years'] - (health16['19 to 25 years'] - (health16['Under 6 years'] + health16['6 to 18 years'] + health16['18 to 24 years']))
health16.drop(columns = ['25 to 34 years', '18 to 24 years'], inplace = True)

health16 = health16[['State', 'County', 'Insurance Category', 'Total Population',
       'Under 6 years', '6 to 18 years',  '19 to 25 years', '26 to 34 years', '35 to 44 years',
       '45 to 54 years', '55 to 64 years', '65 to 74 years',
       '75 years and older', 'Male', 'Female', 'White alone',
       'Black or African American alone',
       'American Indian and Alaska Native alone', 'Asian alone',
       'Native Hawaiian and Other Pacific Islander alone',
       'Some other race alone', 'Hispanic or Latino (of any race)',
       'Native born', 'Foreign born', 'Naturalized', 'Not a citizen',
       'Less than high school graduate',
       'High school graduate (includes equivalency)',
       "Some college or associate's degree", "Bachelor's degree or higher",
       'Under $25,000', '$25,000 to $49,999', '$50,000 to $74,999',
       '$75,000 to $99,999', '$100,000 and over']]

health16.columns = ['State', 'County', 'Insurance_Category', \
    'Total_Population', 'Under_6Y', '_6_to_18Y', '_19_to_25Y', '_26_to_34Y', '_35_to_44Y', \
    '_45_to_54Y', '_55_to_64Y', '_65_to_74Y', '_75_and_Older', 'Male', \
    'Female', 'White', 'African_American', 'American_Indian', 'Asian', 'Pacific_Islander', \
    'Some_Other_Race', 'Hispanic', \
    'Native_Born', 'Foreign_Born', 'Naturalized', 'Not_A_Citizen', \
    'Less_Than_High_School', 'High_School_or_Equivalent', \
    "Some_College", "Bachelors_or_Higher", 'Under_25000S', '_25000_to_49999S', \
    '_50000_to_74999S', '_75000_to_99999S', 'Over_100000S']

health16 = health16[health16['Insurance_Category'] != 'Total']

**Cleaning Health17 Dataset**

In [6]:
health17 = pd.read_csv("HIC/Health Insurance Characteristics 2017.csv")

# Counting and dropping all null values in each dataset
health17.dropna(inplace = True)

# Resetting the index for each dataset
rows = []
for i in range(0, health17.shape[0]):
    rows.append(i)

health17[''] = rows
health17 = health17.set_index('')

Category = []

for i in health17['Label (Grouping)']:
    Category.append(i.strip())

health17['Label (Grouping)'] = Category

newcolumns15 = list(health17['Label (Grouping)'])

# Transposing so the states become the rows and the categories become the columns
health17 = health17.transpose()

categories = list(health17.index)[1:]
Counties = ['County']
States = ['State']
Insurance = ['PopCategory']

for i in categories:
    state = i.split(',')[1]
    Counties.append(i.split(',')[0])
    States.append(state.split('!!')[0])
    Insurance.append(state.split('!!')[1])

health17['61'] = Counties
health17['62'] = States
health17['63'] = Insurance

newcolumns15.append('County')
newcolumns15.append('State')
newcolumns15.append('PopCategory')

health17.columns = newcolumns15

health17.drop(index = 'Label (Grouping)', inplace = True)

indeces = []

for i in range(0, health17.shape[0]):
    indeces.append(i)
health17[''] = indeces
health17.set_index('', inplace = True)

health17.replace('N', 0, inplace = True)

dropColumns = ['Under 19 years', '19 to 64 years', '65 years and older', 'Two or more races', 'White alone, not Hispanic or Latino', 'In family households', \
    'In married couple families', 'In other families', 'Male householder, no wife present', 'Female householder, no husband present', \
    'In non-family households and other living arrangements', 'With a disability', 'No disability', 'Civilian noninstitutionalized population 26 years and over', \
    'Civilian noninstitutionalized population 19 to 64 years', 'In labor force', 'Employed', 'Unemployed', 'Not in labor force', \
    'Civilian noninstitutionalized population 19 to 64 years', 'Worked full-time, year round in the past 12 months', \
    'Worked less than full-time, year round in the past 12 months', 'Did not work', 'Total household population', \
    'Civilian noninstitutionalized population for whom poverty status is determined', 'Below 138 percent of the poverty threshold', \
    '138 to 399 percent of the poverty threshold', 'At or above 400 percent of the poverty threshold', 'Below 100 percent of the poverty threshold']

health17.drop(columns = dropColumns, inplace = True)
health17.rename(columns = {'Civilian noninstitutionalized population':'Total Population', 'PopCategory':'Insurance Category'}, inplace = True)

sortColumns = ['State', 'County', 'Insurance Category', 'Total Population', 'Under 6 years', '6 to 18 years', '19 to 25 years', '26 to 34 years', '35 to 44 years', '45 to 54 years', '55 to 64 years', \
    '65 to 74 years', '75 years and older', 'Male', 'Female', 'White alone', 'Black or African American alone', 'American Indian and Alaska Native alone', 'Asian alone', \
    'Native Hawaiian and Other Pacific Islander alone', 'Some other race alone', 'Hispanic or Latino (of any race)', 'Native born', 'Foreign born', 'Naturalized', \
    'Not a citizen', 'Less than high school graduate', 'High school graduate (includes equivalency)', "Some college or associate's degree", "Bachelor's degree or higher", \
    'Under $25,000', '$25,000 to $49,999', '$50,000 to $74,999', '$75,000 to $99,999', '$100,000 and over']

health17 = health17[sortColumns]

columns = list(health17.columns)[3:]
for i in columns:
       health17[i] = health17[i].str.replace(',', '')
       health17[i] = pd.to_numeric(health17[i])

# Imputing the 0 values for the mean of the population
health17.fillna(0, inplace = True)
imp = SimpleImputer(strategy = 'mean', missing_values = 0)

health17[columns] = imp.fit_transform(health17[columns])
health17[columns] = health17[columns].astype(int)

health17.columns = ['State', 'County', 'Insurance_Category', \
    'Total_Population', 'Under_6Y', '_6_to_18Y', '_19_to_25Y', '_26_to_34Y', '_35_to_44Y', \
    '_45_to_54Y', '_55_to_64Y', '_65_to_74Y', '_75_and_Older', 'Male', \
    'Female', 'White', 'African_American', 'American_Indian', 'Asian', 'Pacific_Islander', \
    'Some_Other_Race', 'Hispanic', \
    'Native_Born', 'Foreign_Born', 'Naturalized', 'Not_A_Citizen', \
    'Less_Than_High_School', 'High_School_or_Equivalent', \
    "Some_College", "Bachelors_or_Higher", 'Under_25000S', '_25000_to_49999S', \
    '_50000_to_74999S', '_75000_to_99999S', 'Over_100000S']

health17['State'] = health17['State'].str.lstrip()
health17['State'] = health17['State'].str.replace(' ', '_')
health17['State'] = health17['State'].replace('District_of_Columbia', 'DC')
health17 = health17[health17['Insurance_Category'] != 'Total']

**Cleaning Health18 Dataset**

In [7]:
health18 = pd.read_csv("HIC/Health Insurance Characteristics 2018.csv")

# Counting and dropping all null values in each dataset
health18.dropna(inplace = True)

# Resetting the index for each dataset
rows = []
for i in range(0, health18.shape[0]):
    rows.append(i)

health18[''] = rows
health18 = health18.set_index('')

Category = []

for i in health18['Label (Grouping)']:
    Category.append(i.strip())

health18['Label (Grouping)'] = Category

newcolumns15 = list(health18['Label (Grouping)'])

# Transposing so the states become the rows and the categories become the columns
health18 = health18.transpose()

categories = list(health18.index)[1:]
Counties = ['County']
States = ['State']
Insurance = ['PopCategory']

for i in categories:
    state = i.split(',')[1]
    Counties.append(i.split(',')[0])
    States.append(state.split('!!')[0])
    Insurance.append(state.split('!!')[1])

health18['61'] = Counties
health18['62'] = States
health18['63'] = Insurance

newcolumns15.append('County')
newcolumns15.append('State')
newcolumns15.append('PopCategory')

health18.columns = newcolumns15

health18.drop(index = 'Label (Grouping)', inplace = True)

indeces = []

for i in range(0, health18.shape[0]):
    indeces.append(i)
health18[''] = indeces
health18.set_index('', inplace = True)

health18.replace('N', 0, inplace = True)

dropColumns = ['Under 19 years', '19 to 64 years', '65 years and older', 'Two or more races', 'White alone, not Hispanic or Latino', 'In family households', \
    'In married couple families', 'In other families', 'Male householder, no wife present', 'Female householder, no husband present', \
    'In non-family households and other living arrangements', 'With a disability', 'No disability', 'Civilian noninstitutionalized population 26 years and over', \
    'Civilian noninstitutionalized population 19 to 64 years', 'In labor force', 'Employed', 'Unemployed', 'Not in labor force', \
    'Civilian noninstitutionalized population 19 to 64 years', 'Worked full-time, year round in the past 12 months', \
    'Worked less than full-time, year round in the past 12 months', 'Did not work', 'Total household population', \
    'Civilian noninstitutionalized population for whom poverty status is determined', 'Below 138 percent of the poverty threshold', \
    '138 to 399 percent of the poverty threshold', 'At or above 400 percent of the poverty threshold', 'Below 100 percent of the poverty threshold']

health18.drop(columns = dropColumns, inplace = True)
health18.rename(columns = {'Civilian noninstitutionalized population':'Total Population', 'PopCategory':'Insurance Category'}, inplace = True)

sortColumns = ['State', 'County', 'Insurance Category', 'Total Population', 'Under 6 years', '6 to 18 years', '19 to 25 years', '26 to 34 years', '35 to 44 years', '45 to 54 years', '55 to 64 years', \
    '65 to 74 years', '75 years and older', 'Male', 'Female', 'White alone', 'Black or African American alone', 'American Indian and Alaska Native alone', 'Asian alone', \
    'Native Hawaiian and Other Pacific Islander alone', 'Some other race alone', 'Hispanic or Latino (of any race)', 'Native born', 'Foreign born', 'Naturalized', \
    'Not a citizen', 'Less than high school graduate', 'High school graduate (includes equivalency)', "Some college or associate's degree", "Bachelor's degree or higher", \
    'Under $25,000', '$25,000 to $49,999', '$50,000 to $74,999', '$75,000 to $99,999', '$100,000 and over']

health18 = health18[sortColumns]

columns = list(health18.columns)[3:]
for i in columns:
       health18[i] = health18[i].str.replace(',', '')
       health18[i] = pd.to_numeric(health18[i])

# Imputing the 0 values for the mean of the population
health18.fillna(0, inplace = True)
imp = SimpleImputer(strategy = 'mean', missing_values = 0)

health18[columns] = imp.fit_transform(health18[columns])
health18[columns] = health18[columns].astype(int)

health18.columns = ['State', 'County', 'Insurance_Category', \
    'Total_Population', 'Under_6Y', '_6_to_18Y', '_19_to_25Y', '_26_to_34Y', '_35_to_44Y', \
    '_45_to_54Y', '_55_to_64Y', '_65_to_74Y', '_75_and_Older', 'Male', \
    'Female', 'White', 'African_American', 'American_Indian', 'Asian', 'Pacific_Islander', \
    'Some_Other_Race', 'Hispanic', \
    'Native_Born', 'Foreign_Born', 'Naturalized', 'Not_A_Citizen', \
    'Less_Than_High_School', 'High_School_or_Equivalent', \
    "Some_College", "Bachelors_or_Higher", 'Under_25000S', '_25000_to_49999S', \
    '_50000_to_74999S', '_75000_to_99999S', 'Over_100000S']

health18['State'] = health18['State'].str.lstrip()
health18['State'] = health18['State'].str.replace(' ', '_')
health18['State'] = health18['State'].replace('District_of_Columbia', 'DC')
health18 = health18[health18['Insurance_Category'] != 'Total']

**Cleaning Health19 Dataset**

In [8]:
health19 = pd.read_csv("HIC/Health Insurance Characteristics.csv")

# Counting the NA values in each column
health19.isna().sum()

# Dropping the NA rows since there was 5 in every row
health19.dropna(inplace=True)

# Making sure all NA values are gone
health19.isna().sum()

rows = []
for i in range(0, health19.shape[0]):
    rows.append(i)

health19[''] = rows
health19 = health19.set_index('')

Category = []

for i in health19['Label (Grouping)']:
    Category.append(i.strip())

health19['Label (Grouping)'] = Category

newcolumns = list(health19['Label (Grouping)'])

# Transposing so the states become the rows and the categories become the columns
health19 = health19.transpose()

categories = list(health19.index)[1:]
Counties = ['County']
States = ['State']
Insurance = ['PopCategory']

for i in categories:
    state = i.split(',')[1]
    Counties.append(i.split(',')[0])
    States.append(state.split('!!')[0])
    Insurance.append(state.split('!!')[1])

health19['61'] = Counties
health19['62'] = States
health19['63'] = Insurance

newcolumns.append('County')
newcolumns.append('State')
newcolumns.append('PopCategory')

health19.columns = newcolumns

health19.drop(index = 'Label (Grouping)', inplace = True)

indeces = []

for i in range(0, health19.shape[0]):
    indeces.append(i)
health19[''] = indeces
health19.set_index('', inplace = True)

health19.replace('N', 0, inplace = True)

health19.drop(columns = ['Total household population', 'In non-family households and other living arrangements', \
    'In family households', 'In married couple families', 'In other families', 'Male reference person, no spouse present', \
    'Female reference person, no spouse present', 'With a disability', 'No disability', 'Civilian noninstitutionalized population 19 to 64 years', \
    'In labor force', 'Employed', 'Unemployed', 'Not in labor force', 'Worked full-time, year round in the past 12 months', \
    'Worked less than full-time, year round in the past 12 months', 'Did not work', 'Civilian noninstitutionalized population for whom poverty status is determined', \
    'Civilian noninstitutionalized population 26 years and over', 'Under 19 years', '19 to 64 years', '65 years and older', 'Two or more races', \
    'White alone, not Hispanic or Latino', 'Below 138 percent of the poverty threshold', '138 to 399 percent of the poverty threshold', \
    'At or above 400 percent of the poverty threshold', 'Below 100 percent of the poverty threshold'], inplace = True)

columns = list(health19.columns)[:-3]
for i in columns:
       health19[i] = health19[i].str.replace(',', '')
       health19[i] = pd.to_numeric(health19[i])

# Imputing the 0 values for the mean of the population
health19.fillna(0, inplace = True)
imp = SimpleImputer(strategy = 'mean', missing_values = 0)

health19[columns] = imp.fit_transform(health19[columns])
health19[columns] = health19[columns].astype(int)

newColumns = ['State', 'County', 'PopCategory', \
    'Civilian noninstitutionalized population', 'Under 6 years', '6 to 18 years', '19 to 25 years', '26 to 34 years', '35 to 44 years', \
    '45 to 54 years', '55 to 64 years', '65 to 74 years', '75 years and older', 'Male', \
    'Female', 'White alone', 'Black or African American alone', 'American Indian and Alaska Native alone', 'Asian alone', 'Native Hawaiian and Other Pacific Islander alone', \
    'Some other race alone', 'Hispanic or Latino (of any race)', \
    'Native born', 'Foreign born', 'Naturalized', 'Not a citizen', \
    'Less than high school graduate', 'High school graduate (includes equivalency)', \
    "Some college or associate's degree", "Bachelor's degree or higher", 'Under $25,000', '$25,000 to $49,999', \
    '$50,000 to $74,999', '$75,000 to $99,999', '$100,000 and over']

health19 = health19[newColumns]

health19.columns = ['State', 'County', 'Insurance_Category', \
    'Total_Population', 'Under_6Y', '_6_to_18Y', '_19_to_25Y', '_26_to_34Y', '_35_to_44Y', \
    '_45_to_54Y', '_55_to_64Y', '_65_to_74Y', '_75_and_Older', 'Male', \
    'Female', 'White', 'African_American', 'American_Indian', 'Asian', 'Pacific_Islander', \
    'Some_Other_Race', 'Hispanic', \
    'Native_Born', 'Foreign_Born', 'Naturalized', 'Not_A_Citizen', \
    'Less_Than_High_School', 'High_School_or_Equivalent', \
    "Some_College", "Bachelors_or_Higher", 'Under_25000S', '_25000_to_49999S', \
    '_50000_to_74999S', '_75000_to_99999S', 'Over_100000S']

health19['State'] = health19['State'].str.lstrip()
health19['State'] = health19['State'].str.replace(' ', '_')
health19['State'] = health19['State'].replace('District_of_Columbia', 'DC')
health19 = health19[health19['Insurance_Category'] != 'Total']

**Test Data**

In [9]:
testdemo = pd.read_csv("Demographics Test Data 2020.csv")

# Counting the NA values in each column
testdemo.isna().sum()

# Dropping the NA rows since there was 11 in every row
testdemo.dropna(inplace=True)

# Making sure all NA values are gone
testdemo.isna().sum()

Category = []

for i in testdemo['Label (Grouping)']:
    Category.append(i.strip())

testdemo['Label (Grouping)'] = Category

newcolumns = list(testdemo['Label (Grouping)'])

testdemo.drop(columns = ['United States!!Insured!!Estimate', 'United States!!Uninsured!!Estimate'], inplace = True)
testdemo

# Transposing so the states become the rows and the categories become the columns
testdemo = testdemo.transpose()

categories = list(testdemo.index)[1:]
Counties = ['County']
States = ['State']
Insurance = ['PopCategory']
testdemo
for i in categories:
    category = i.removesuffix('!!Estimate')
    state = (category.split(',')[1])
    Counties.append(category.split(',')[0])
    States.append(state.split('!!')[0])
    Insurance.append(state.split('!!')[1])

testdemo[72] = Counties
testdemo[73] = States
testdemo[74] = Insurance

newcolumns.append('County')
newcolumns.append('State')
newcolumns.append('PopCategory')

testdemo.columns = newcolumns

testdemo.drop(index = 'Label (Grouping)', inplace = True)

indeces = []

for i in range(0, testdemo.shape[0]):
    indeces.append(i)
testdemo[''] = indeces
testdemo.set_index('', inplace = True)


testdemo.drop(columns = ['Total household population', 'In non-family households and other living arrangements', \
    'In family households', 'In married couple families', 'In other families', 'Male reference person, no spouse present', \
    'Female reference person, no spouse present', 'With a disability', 'No disability', 'Civilian noninstitutionalized population 19 to 64 years', \
    'In labor force', 'Employed', 'Unemployed', 'Not in labor force', 'Worked full-time, year round in the past 12 months', \
    'Worked less than full-time, year round in the past 12 months', 'Did not work', 'Civilian noninstitutionalized population for whom poverty status is determined', \
    'Civilian noninstitutionalized population 26 years and over', 'Under 19 years', '19 to 64 years', '65 years and older', 'Two or more races', \
    'White alone, not Hispanic or Latino', 'Below 138 percent of the poverty threshold', '138 to 399 percent of the poverty threshold', \
    'At or above 400 percent of the poverty threshold', 'Below 100 percent of the poverty threshold'], inplace = True)

columns = list(testdemo.columns)[:-3]
for i in columns:
       testdemo = testdemo.replace([','],'', regex = True)
       testdemo[i] = pd.to_numeric(testdemo[i])


testdemo[columns] = testdemo[columns].astype(int)

newColumns = ['State', 'County', 'PopCategory', \
    'Civilian noninstitutionalized population', 'Under 6 years', '6 to 18 years', '19 to 25 years', '26 to 34 years', '35 to 44 years', \
    '45 to 54 years', '55 to 64 years', '65 to 74 years', '75 years and older', 'Male', \
    'Female', 'White alone', 'Black or African American alone', 'American Indian and Alaska Native alone', 'Asian alone', 'Native Hawaiian and Other Pacific Islander alone', \
    'Some other race alone', 'Hispanic or Latino (of any race)', \
    'Native born', 'Foreign born', 'Naturalized', 'Not a citizen', \
    'Less than high school graduate', 'High school graduate (includes equivalency)', \
    "Some college or associate's degree", "Bachelor's degree or higher", 'Under $25,000', '$25,000 to $49,999', \
    '$50,000 to $74,999', '$75,000 to $99,999', '$100,000 and over']

testdemo = testdemo[newColumns]

testdemo.columns = ['State', 'County', 'Insurance_Category', \
    'Total_Population', 'Under_6Y', '_6_to_18Y', '_19_to_25Y', '_26_to_34Y', '_35_to_44Y', \
    '_45_to_54Y', '_55_to_64Y', '_65_to_74Y', '_75_and_Older', 'Male', \
    'Female', 'White', 'African_American', 'American_Indian', 'Asian', 'Pacific_Islander', \
    'Some_Other_Race', 'Hispanic', \
    'Native_Born', 'Foreign_Born', 'Naturalized', 'Not_A_Citizen', \
    'Less_Than_High_school', 'High_School_or_Equivalent', \
    "Some_College", "Bachelors_or_Higher", 'Under_25000S', '_25000_to_49999S', \
    '_50000_to_74999S', '_75000_to_99999S', 'Over100000S']

testdemo['State'] = testdemo['State'].str.lstrip()
testdemo['State'] = testdemo['State'].str.replace(' ', '_')
testdemo['State'] = testdemo['State'].replace('District_of_Columbia', 'DC')

**ANOVA tests**

In [10]:
# ANOVA Test for 2015

healthAnova = health15.rename(columns = {'Insurance_Category':'InsuranceCategory', 'Total_Population':'TotalPopulation', 'Under_6Y':'Undersixyears', \
            '_6_to_18Y':'sixtoeighteenyears',  '_19_to_25Y':'nineteentotwentyfiveyears', '_26_to_34Y':'twentysixtothirtyfouryears', \
            '_35_to_44Y':'thirtyfivetofourtyfouryears', '_45_to_54Y':'fourtyfivetofiftyfouryears', '_55_to_64Y':'fiftyfivetosixtyfouryears', \
            '_65_to_74Y':'sixtyfivetoseventyfouryears', '_75_and_Older':'seventyfiveandmoreyears', \
            'African_American':'AfricanAmerican', 'American_Indian':'AmericanIndian', \
            'Pacific_Islander':'PacificIslander', 'Some_Other_Race':'Someotherrace', \
            'Native_Born':'Nativeborn', 'Foreign_Born':'Foreignborn', 'Not_A_Citizen':'Notacitizen', \
            'Less_Than_High_School':'Lessthanhighschoolgraduate', 'High_School_or_Equivalent':'Highschoolgraduate',  \
            "Some_College":"Somecollege", "Bachelors_or_Higher":"Bachelors", 'Under_25000S':'Undertwentyfive', \
            '_25000_to_49999S':'twentyfivetofourtyninesalary', '_50000_to_74999S':'fiftytoseventyfoursalary', \
            '_75000_to_99999S':'seventyfivetoninetyninesalary', 'Over_100000S':'onehundredandoversalary'}).copy()

anovaColumns = []
for i in list(healthAnova.columns):
    anovaColumns.append(i)

anovaColumns = anovaColumns[4:-1]

for i in anovaColumns:
    data15 = healthAnova[[i, 'InsuranceCategory', 'State']]
    health15_lm = ols(f'{i} ~ C(State, Sum)*C(InsuranceCategory, Sum)', data = data15).fit()

    table = sm.stats.anova_lm(health15_lm, typ = 2)
    print(i, table['PR(>F)'])

Undersixyears C(State, Sum)                              1.687828e-04
C(InsuranceCategory, Sum)                  8.299529e-45
C(State, Sum):C(InsuranceCategory, Sum)    1.471362e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
sixtoeighteenyears C(State, Sum)                              3.974959e-06
C(InsuranceCategory, Sum)                  2.431480e-52
C(State, Sum):C(InsuranceCategory, Sum)    4.919738e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
nineteentotwentyfiveyears C(State, Sum)                              4.459071e-09
C(InsuranceCategory, Sum)                  1.000749e-37
C(State, Sum):C(InsuranceCategory, Sum)    2.042167e-02
Residual                                            NaN
Name: PR(>F), dtype: float64
twentysixtothirtyfouryears C(State, Sum)                              1.591891e-06
C(InsuranceCategory, Sum)                  4.219443e-45
C(State, Sum):C(InsuranceCategory, Sum)    

In [11]:
# ANOVA Test for 2016

healthAnova = health16.rename(columns = {'Insurance_Category':'InsuranceCategory', 'Total_Population':'TotalPopulation', 'Under_6Y':'Undersixyears', \
            '_6_to_18Y':'sixtoeighteenyears',  '_19_to_25Y':'nineteentotwentyfiveyears', '_26_to_34Y':'twentysixtothirtyfouryears', \
            '_35_to_44Y':'thirtyfivetofourtyfouryears', '_45_to_54Y':'fourtyfivetofiftyfouryears', '_55_to_64Y':'fiftyfivetosixtyfouryears', \
            '_65_to_74Y':'sixtyfivetoseventyfouryears', '_75_and_Older':'seventyfiveandmoreyears', \
            'African_American':'AfricanAmerican', 'American_Indian':'AmericanIndian', \
            'Pacific_Islander':'PacificIslander', 'Some_Other_Race':'Someotherrace', \
            'Native_Born':'Nativeborn', 'Foreign_Born':'Foreignborn', 'Not_A_Citizen':'Notacitizen', \
            'Less_Than_High_School':'Lessthanhighschoolgraduate', 'High_School_or_Equivalent':'Highschoolgraduate',  \
            "Some_College":"Somecollege", "Bachelors_or_Higher":"Bachelors", 'Under_25000S':'Undertwentyfive', \
            '_25000_to_49999S':'twentyfivetofourtyninesalary', '_50000_to_74999S':'fiftytoseventyfoursalary', \
            '_75000_to_99999S':'seventyfivetoninetyninesalary', 'Over_100000S':'onehundredandoversalary'}).copy()

anovaColumns = []
for i in list(healthAnova.columns):
    anovaColumns.append(i)

anovaColumns = anovaColumns[4:-1]

for i in anovaColumns:
    data16 = healthAnova[[i, 'InsuranceCategory', 'State']]
    health16_lm = ols(f'{i} ~ C(State, Sum)*C(InsuranceCategory, Sum)', data = data16).fit()

    table = sm.stats.anova_lm(health16_lm, typ = 2)
    print(i, table['PR(>F)'])

Undersixyears C(State, Sum)                              5.166142e-05
C(InsuranceCategory, Sum)                  2.748597e-45
C(State, Sum):C(InsuranceCategory, Sum)    2.499196e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
sixtoeighteenyears C(State, Sum)                              4.381454e-06
C(InsuranceCategory, Sum)                  1.206117e-52
C(State, Sum):C(InsuranceCategory, Sum)    3.813244e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
nineteentotwentyfiveyears C(State, Sum)                              1.038795e-08
C(InsuranceCategory, Sum)                  2.716909e-40
C(State, Sum):C(InsuranceCategory, Sum)    5.106389e-03
Residual                                            NaN
Name: PR(>F), dtype: float64
twentysixtothirtyfouryears C(State, Sum)                              2.303551e-06
C(InsuranceCategory, Sum)                  8.215435e-46
C(State, Sum):C(InsuranceCategory, Sum)    

In [12]:
# ANOVA Test for 2017

healthAnova = health17.rename(columns = {'Insurance_Category':'InsuranceCategory', 'Total_Population':'TotalPopulation', 'Under_6Y':'Undersixyears', \
            '_6_to_18Y':'sixtoeighteenyears',  '_19_to_25Y':'nineteentotwentyfiveyears', '_26_to_34Y':'twentysixtothirtyfouryears', \
            '_35_to_44Y':'thirtyfivetofourtyfouryears', '_45_to_54Y':'fourtyfivetofiftyfouryears', '_55_to_64Y':'fiftyfivetosixtyfouryears', \
            '_65_to_74Y':'sixtyfivetoseventyfouryears', '_75_and_Older':'seventyfiveandmoreyears', \
            'African_American':'AfricanAmerican', 'American_Indian':'AmericanIndian', \
            'Pacific_Islander':'PacificIslander', 'Some_Other_Race':'Someotherrace', \
            'Native_Born':'Nativeborn', 'Foreign_Born':'Foreignborn', 'Not_A_Citizen':'Notacitizen', \
            'Less_Than_High_School':'Lessthanhighschoolgraduate', 'High_School_or_Equivalent':'Highschoolgraduate',  \
            "Some_College":"Somecollege", "Bachelors_or_Higher":"Bachelors", 'Under_25000S':'Undertwentyfive', \
            '_25000_to_49999S':'twentyfivetofourtyninesalary', '_50000_to_74999S':'fiftytoseventyfoursalary', \
            '_75000_to_99999S':'seventyfivetoninetyninesalary', 'Over_100000S':'onehundredandoversalary'}).copy()

anovaColumns = []
for i in list(healthAnova.columns):
    anovaColumns.append(i)

anovaColumns = anovaColumns[4:-1]

for i in anovaColumns:
    data17 = healthAnova[[i, 'InsuranceCategory', 'State']]
    health17_lm = ols(f'{i} ~ C(State, Sum)*C(InsuranceCategory, Sum)', data = data17).fit()

    table = sm.stats.anova_lm(health17_lm, typ = 2)
    print(i, table['PR(>F)'])

Undersixyears C(State, Sum)                              5.187147e-05
C(InsuranceCategory, Sum)                  3.984940e-45
C(State, Sum):C(InsuranceCategory, Sum)    1.888073e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
sixtoeighteenyears C(State, Sum)                              7.495681e-07
C(InsuranceCategory, Sum)                  7.035174e-54
C(State, Sum):C(InsuranceCategory, Sum)    2.904931e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
nineteentotwentyfiveyears C(State, Sum)                              1.417758e-08
C(InsuranceCategory, Sum)                  1.446899e-40
C(State, Sum):C(InsuranceCategory, Sum)    4.209133e-03
Residual                                            NaN
Name: PR(>F), dtype: float64
twentysixtothirtyfouryears C(State, Sum)                              1.565686e-07
C(InsuranceCategory, Sum)                  1.483566e-31
C(State, Sum):C(InsuranceCategory, Sum)    

In [13]:
# ANOVA Test for 2018

healthAnova = health18.rename(columns = {'Insurance_Category':'InsuranceCategory', 'Total_Population':'TotalPopulation', 'Under_6Y':'Undersixyears', \
            '_6_to_18Y':'sixtoeighteenyears',  '_19_to_25Y':'nineteentotwentyfiveyears', '_26_to_34Y':'twentysixtothirtyfouryears', \
            '_35_to_44Y':'thirtyfivetofourtyfouryears', '_45_to_54Y':'fourtyfivetofiftyfouryears', '_55_to_64Y':'fiftyfivetosixtyfouryears', \
            '_65_to_74Y':'sixtyfivetoseventyfouryears', '_75_and_Older':'seventyfiveandmoreyears', \
            'African_American':'AfricanAmerican', 'American_Indian':'AmericanIndian', \
            'Pacific_Islander':'PacificIslander', 'Some_Other_Race':'Someotherrace', \
            'Native_Born':'Nativeborn', 'Foreign_Born':'Foreignborn', 'Not_A_Citizen':'Notacitizen', \
            'Less_Than_High_School':'Lessthanhighschoolgraduate', 'High_School_or_Equivalent':'Highschoolgraduate',  \
            "Some_College":"Somecollege", "Bachelors_or_Higher":"Bachelors", 'Under_25000S':'Undertwentyfive', \
            '_25000_to_49999S':'twentyfivetofourtyninesalary', '_50000_to_74999S':'fiftytoseventyfoursalary', \
            '_75000_to_99999S':'seventyfivetoninetyninesalary', 'Over_100000S':'onehundredandoversalary'}).copy()

anovaColumns = []
for i in list(healthAnova.columns):
    anovaColumns.append(i)

anovaColumns = anovaColumns[4:-1]

for i in anovaColumns:
    data18 = healthAnova[[i, 'InsuranceCategory', 'State']]
    health18_lm = ols(f'{i} ~ C(State, Sum)*C(InsuranceCategory, Sum)', data = data18).fit()

    table = sm.stats.anova_lm(health18_lm, typ = 2)
    print(i, table['PR(>F)'])

Undersixyears C(State, Sum)                              2.394707e-05
C(InsuranceCategory, Sum)                  5.531727e-46
C(State, Sum):C(InsuranceCategory, Sum)    2.058612e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
sixtoeighteenyears C(State, Sum)                              5.642848e-07
C(InsuranceCategory, Sum)                  2.427368e-54
C(State, Sum):C(InsuranceCategory, Sum)    1.979750e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
nineteentotwentyfiveyears C(State, Sum)                              1.348574e-08
C(InsuranceCategory, Sum)                  5.973499e-41
C(State, Sum):C(InsuranceCategory, Sum)    5.393031e-03
Residual                                            NaN
Name: PR(>F), dtype: float64
twentysixtothirtyfouryears C(State, Sum)                              9.946257e-08
C(InsuranceCategory, Sum)                  8.327702e-32
C(State, Sum):C(InsuranceCategory, Sum)    

In [14]:
# ANOVA Test for 2019

healthAnova = health19.rename(columns = {'Insurance_Category':'InsuranceCategory', 'Total_Population':'TotalPopulation', 'Under_6Y':'Undersixyears', \
            '_6_to_18Y':'sixtoeighteenyears',  '_19_to_25Y':'nineteentotwentyfiveyears', '_26_to_34Y':'twentysixtothirtyfouryears', \
            '_35_to_44Y':'thirtyfivetofourtyfouryears', '_45_to_54Y':'fourtyfivetofiftyfouryears', '_55_to_64Y':'fiftyfivetosixtyfouryears', \
            '_65_to_74Y':'sixtyfivetoseventyfouryears', '_75_and_Older':'seventyfiveandmoreyears', \
            'African_American':'AfricanAmerican', 'American_Indian':'AmericanIndian', \
            'Pacific_Islander':'PacificIslander', 'Some_Other_Race':'Someotherrace', \
            'Native_Born':'Nativeborn', 'Foreign_Born':'Foreignborn', 'Not_A_Citizen':'Notacitizen', \
            'Less_Than_High_School':'Lessthanhighschoolgraduate', 'High_School_or_Equivalent':'Highschoolgraduate',  \
            "Some_College":"Somecollege", "Bachelors_or_Higher":"Bachelors", 'Under_25000S':'Undertwentyfive', \
            '_25000_to_49999S':'twentyfivetofourtyninesalary', '_50000_to_74999S':'fiftytoseventyfoursalary', \
            '_75000_to_99999S':'seventyfivetoninetyninesalary', 'Over_100000S':'onehundredandoversalary'}).copy()

anovaColumns = []
for i in list(healthAnova.columns):
    anovaColumns.append(i)

anovaColumns = anovaColumns[4:-1]

for i in anovaColumns:
    data19 = healthAnova[[i, 'InsuranceCategory', 'State']]
    health19_lm = ols(f'{i} ~ C(State, Sum)*C(InsuranceCategory, Sum)', data = data19).fit()

    table = sm.stats.anova_lm(health19_lm, typ = 2)
    print(i, table['PR(>F)'])

Undersixyears C(State, Sum)                              4.905663e-05
C(InsuranceCategory, Sum)                  1.557283e-46
C(State, Sum):C(InsuranceCategory, Sum)    2.681139e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
sixtoeighteenyears C(State, Sum)                              5.527105e-07
C(InsuranceCategory, Sum)                  2.590060e-54
C(State, Sum):C(InsuranceCategory, Sum)    4.808534e-04
Residual                                            NaN
Name: PR(>F), dtype: float64
nineteentotwentyfiveyears C(State, Sum)                              1.349050e-08
C(InsuranceCategory, Sum)                  5.274816e-41
C(State, Sum):C(InsuranceCategory, Sum)    1.125046e-02
Residual                                            NaN
Name: PR(>F), dtype: float64
twentysixtothirtyfouryears C(State, Sum)                              1.539047e-07
C(InsuranceCategory, Sum)                  1.501824e-31
C(State, Sum):C(InsuranceCategory, Sum)    

In [15]:
# ANOVA Test for 2020, testing data

healthAnova = testdemo.rename(columns = {'Insurance_Category':'InsuranceCategory', 'Total_Population':'TotalPopulation', 'Under_6Y':'Undersixyears', \
            '_6_to_18Y':'sixtoeighteenyears',  '_19_to_25Y':'nineteentotwentyfiveyears', '_26_to_34Y':'twentysixtothirtyfouryears', \
            '_35_to_44Y':'thirtyfivetofourtyfouryears', '_45_to_54Y':'fourtyfivetofiftyfouryears', '_55_to_64Y':'fiftyfivetosixtyfouryears', \
            '_65_to_74Y':'sixtyfivetoseventyfouryears', '_75_and_Older':'seventyfiveandmoreyears', \
            'African_American':'AfricanAmerican', 'American_Indian':'AmericanIndian', \
            'Pacific_Islander':'PacificIslander', 'Some_Other_Race':'Someotherrace', \
            'Native_Born':'Nativeborn', 'Foreign_Born':'Foreignborn', 'Not_A_Citizen':'Notacitizen', \
            'Less_Than_High_School':'Lessthanhighschoolgraduate', 'High_School_or_Equivalent':'Highschoolgraduate',  \
            "Some_College":"Somecollege", "Bachelors_or_Higher":"Bachelors", 'Under_25000S':'Undertwentyfive', \
            '_25000_to_49999S':'twentyfivetofourtyninesalary', '_50000_to_74999S':'fiftytoseventyfoursalary', \
            '_75000_to_99999S':'seventyfivetoninetyninesalary', 'Over_100000S':'onehundredandoversalary'}).copy()

anovaColumns = []
for i in list(healthAnova.columns):
    anovaColumns.append(i)

anovaColumns = anovaColumns[4:]

for i in anovaColumns:
    data20 = healthAnova[[i, 'InsuranceCategory', 'State']]
    testdemo_lm = ols(f'{i} ~ C(State, Sum)*C(InsuranceCategory, Sum)', data = data20).fit()

    table = sm.stats.anova_lm(testdemo_lm, typ = 2)
    print(i, table['PR(>F)'])

Undersixyears C(State, Sum)                              1.481817e-56
C(InsuranceCategory, Sum)                  1.989341e-64
C(State, Sum):C(InsuranceCategory, Sum)    5.908542e-48
Residual                                            NaN
Name: PR(>F), dtype: float64
sixtoeighteenyears C(State, Sum)                              2.327305e-64
C(InsuranceCategory, Sum)                  5.907771e-68
C(State, Sum):C(InsuranceCategory, Sum)    2.632484e-51
Residual                                            NaN
Name: PR(>F), dtype: float64
nineteentotwentyfiveyears C(State, Sum)                              9.012595e-80
C(InsuranceCategory, Sum)                  4.626843e-49
C(State, Sum):C(InsuranceCategory, Sum)    1.271343e-43
Residual                                            NaN
Name: PR(>F), dtype: float64
twentysixtothirtyfouryears C(State, Sum)                              7.571157e-71
C(InsuranceCategory, Sum)                  3.703876e-38
C(State, Sum):C(InsuranceCategory, Sum)    