In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## The Affordable Care Act (ACA) is the name for the comprehensive health care reform law and its amendments which addresses health insurance coverage, health care costs, and preventive care.
### This dataset tries to answer a few questions along the way.

### Import necessary libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

### Load dataset and reveal first 5 rows

In [None]:
df = pd.read_csv('/kaggle/input/health-insurance/states.csv')

In [None]:
df.head()

## Checking to see if there are any nulls present

In [None]:
df.isna().sum()

## Check to see the types on replacements that'll be needed, whether mode, median or mean, depending of the data type 

In [None]:
df.info()

In [None]:
df['State Medicaid Expansion (2016)'].value_counts() ## The value_counts will let me know which is the mode 

### Replace the null with the mode

In [None]:
df['State Medicaid Expansion (2016)'].replace(np.nan,'True', inplace = True )

### Repeat the process for the other columns with nulls

In [None]:
df['Medicaid Enrollment (2013)'].mean() ## Check for the mean, since it's a float

In [None]:
df['Medicaid Enrollment (2013)'].replace(np.nan,2255699.08, inplace = True ) ## Replace the null with it

In [None]:
df['Medicaid Enrollment Change (2013-2016)'].mean() ## Find the mean of the next column

In [None]:
df['Medicaid Enrollment Change (2013-2016)'].replace(np.nan,644246.28, inplace = True ) ## Replace the null with it

In [None]:
df.isna().sum() ## Check to see if all the nulls are gone

### Replace True and False with 1 and 0 for ease of graphing

In [None]:
df['State Medicaid Expansion (2016)'].replace('True', 1, inplace = True)
df['State Medicaid Expansion (2016)'].replace('False', 1, inplace = True)

### The first 3 columns are being read as objects. 
1. ### Remove the percentage sign fron the first 3 columns

In [None]:

df['Uninsured Rate (2010)'] = list(map(lambda x: x[:-1], df['Uninsured Rate (2010)'].values))
df['Uninsured Rate (2015)'] = list(map(lambda x: x[:-1], df['Uninsured Rate (2015)'].values))

In [None]:
df.head()

In [None]:
df['Uninsured Rate Change (2010-2015)'] = list(map(lambda x: x[:-1], df['Uninsured Rate Change (2010-2015)'].values))

In [None]:
df.head()

2. ### And convert them from series to float 

In [None]:
df['Uninsured Rate (2010)'] = [float(x) for x in df['Uninsured Rate (2010)'].values]
df['Uninsured Rate (2015)'] = [float(x) for x in df['Uninsured Rate (2015)'].values]
df['Uninsured Rate Change (2010-2015)'] = [float(x) for x in df['Uninsured Rate Change (2010-2015)'].values]

## How has the Affordable Care Act changed the rate of citizens with health insurance coverage?
### It has increased the number of people who are insured

In [None]:
df.head()

# Which states observed the greatest decline in their uninsured rate? 
### California, Nevada and Oregan. Kentucky and West Virginia also follow after that. 

In [None]:
plt.figure(figsize = (10, 10))
sns.barplot('Uninsured Rate Change (2010-2015)', 'State', data = df)
                    

#  Did those states expand Medicaid program coverage and/or implement a health insurance marketplace?
### California had both an increase in Employment enrollment and Medicaid expansion
### Nevada and Oregan can mostly be explained by having a Medicaid Expansion. Kentucky and West Virginia also have that.


In [None]:
plt.figure(figsize = (10, 8))
sns.barplot('Employer Health Insurance Coverage (2015)','State', data = df)

In [None]:

sns.catplot('State Medicaid Expansion (2016)','State',kind = 'swarm', data = df, height = 8)

## What do you predict will happen to the nationwide uninsured rate in the next five years?
### If everything stays the way it is, it should follow the linear projection below and continue to decrease

In [None]:
plt.figure(figsize = (10, 8))
sns.scatterplot('Uninsured Rate (2010)', 'Uninsured Rate (2015)', data = df)

### This graph reveals more people are enrolled on health insurance on average

In [None]:
 
plt.figure(figsize = (10, 8))
sns.scatterplot('Health Insurance Coverage Change (2010-2015)','State', data = df)

## In 2016, health insurance coverage, overall, went up; some states show a significant increase than others: Texas, California, Florida

In [None]:
plt.figure(figsize = (10, 8))
sns.barplot('Marketplace Health Insurance Coverage (2016)','State', data = df) 



# The higher the market place insurance coverage, the more tax credits the states get

In [None]:
plt.figure(figsize = (30,30)) 
sns.barplot('Marketplace Health Insurance Coverage (2016)','Marketplace Tax Credits (2016)', data = df)

### In 2016, the Marketplace increase in health insurance is not due to State Medicaid expansion

In [None]:
a_plot = sns.boxplot('State Medicaid Expansion (2016)','Marketplace Health Insurance Coverage (2016)', data = df)
a_plot.set(xlim=(-1, 2))
a_plot.set(ylim=(0,1000000))


### Medicaid Enrollment increased slightly in 2016 from 2013

In [None]:
plt.figure(figsize = (12,8))
sns.barplot('Medicaid Enrollment (2013)','Medicaid Enrollment (2016)', data = df)

### On average Medicaid enrollment across all states in 2016 increased; California being the biggest 

In [None]:
plt.figure(figsize = (10,8))
sns.barplot('Medicaid Enrollment Change (2013-2016)','State', data = df) 

## Comparing Medicare enrollment in 2016 versus Medicaid. I'll modify the limits of the x and y axes, so I can see the points better

In [None]:
sns.scatterplot('Medicaid Enrollment (2016)','Medicare Enrollment (2016)', data = df)

## Modifying the limits...
### The graph shows an almost one-to-one enrollment, but it looks like Medicaid enrollment is slightly more than medicare in 2016.

In [None]:
a_plot = sns.scatterplot('Medicaid Enrollment (2016)','Medicare Enrollment (2016)', data = df)
a_plot.set(xlim=(0, 5000000))
a_plot.set(ylim=(0, 5000000))

