# Import Dependencies

In [1]:
import pandas as pd

# Import Dataset

In [12]:
data = pd.read_csv('County_Health_Rankings.csv')
data

Unnamed: 0,State,County,State code,County code,Year span,Measure name,Measure id,Numerator,Denominator,Raw value,Confidence Interval Lower Bound,Confidence Interval Upper Bound,Data Release Year,fipscode
0,US,United States,0.0,0.0,2003-2005,Violent crime rate,43.0,1328750.667,274877117.0,483.398066,,,,0.0
1,US,United States,0.0,0.0,2004-2006,Violent crime rate,43.0,1340928.667,277612778.5,483.021233,,,,0.0
2,US,United States,0.0,0.0,2005-2007,Violent crime rate,43.0,1355853.167,280407694.7,483.529230,,,2010.0,0.0
3,US,United States,0.0,0.0,2006-2008,Violent crime rate,43.0,1366928.333,287614567.7,475.263942,,,2011.0,0.0
4,US,United States,0.0,0.0,2007-2009,Violent crime rate,43.0,1339439.333,292576281.2,457.808585,,,2012.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
303859,WY,Weston County,56.0,45.0,2007,Daily fine particulate matter,125.0,,,9.370000,,,,56045.0
303860,WY,Weston County,56.0,45.0,2008,Daily fine particulate matter,125.0,,,7.450000,,,2013.0,56045.0
303861,WY,Weston County,56.0,45.0,2009,Daily fine particulate matter,125.0,,,12.770000,,,,56045.0
303862,WY,Weston County,56.0,45.0,2010,Daily fine particulate matter,125.0,,,12.110000,,,,56045.0


# Explore Dataset

### States Column

In [28]:
num_states = data['State'].nunique()

print(f'There are {num_states} unique values in the State column')

There are 53 unique values in the State column


In [23]:
data['State'].unique()

array(['US', 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL',
       'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
       'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM',
       'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN',
       'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'PR', nan],
      dtype=object)

### County Column

In [29]:
num_counties = data['County'].nunique()
print(f'There are {num_counties} unique values in the County column')

There are 2008 unique values in the County column


In [30]:
data['County'].unique()

array(['United States', 'Alabama', 'Autauga County', ..., nan,
       'Prince of Wales-Outer Ketchikan',
       'Skagway-Hoonah-Angoon Census Ar'], dtype=object)

### Measure Name

In [32]:
num_measures = data['Measure name'].nunique()
print(f'There are {num_measures} unique values in the Measure Name column')

There are 12 unique values in the Measure Name column


In [33]:
data['Measure name'].unique()

array(['Violent crime rate', 'Unemployment', 'Children in poverty', nan,
       'Diabetic screening', 'Mammography screening',
       'Preventable hospital stays', 'Uninsured',
       'Sexually transmitted infections', 'Physical inactivity',
       'Adult obesity', 'Premature Death',
       'Daily fine particulate matter'], dtype=object)

## Clean up

For simplicity's sake, I will limit the analysis to primary 50 US states

In [43]:
states_df = data.loc[data['State'] != "PR"].dropna(subset=['State'])
states_df

Unnamed: 0,State,County,State code,County code,Year span,Measure name,Measure id,Numerator,Denominator,Raw value,Confidence Interval Lower Bound,Confidence Interval Upper Bound,Data Release Year,fipscode
0,US,United States,0.0,0.0,2003-2005,Violent crime rate,43.0,1328750.667,274877117.0,483.398066,,,,0.0
1,US,United States,0.0,0.0,2004-2006,Violent crime rate,43.0,1340928.667,277612778.5,483.021233,,,,0.0
2,US,United States,0.0,0.0,2005-2007,Violent crime rate,43.0,1355853.167,280407694.7,483.529230,,,2010.0,0.0
3,US,United States,0.0,0.0,2006-2008,Violent crime rate,43.0,1366928.333,287614567.7,475.263942,,,2011.0,0.0
4,US,United States,0.0,0.0,2007-2009,Violent crime rate,43.0,1339439.333,292576281.2,457.808585,,,2012.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
303859,WY,Weston County,56.0,45.0,2007,Daily fine particulate matter,125.0,,,9.370000,,,,56045.0
303860,WY,Weston County,56.0,45.0,2008,Daily fine particulate matter,125.0,,,7.450000,,,2013.0,56045.0
303861,WY,Weston County,56.0,45.0,2009,Daily fine particulate matter,125.0,,,12.770000,,,,56045.0
303862,WY,Weston County,56.0,45.0,2010,Daily fine particulate matter,125.0,,,12.110000,,,,56045.0


In [70]:
print(f'Number of rows in original dataset: {len(data):,}')
print(f'Number of rows in trimmed dataset: {len(states_df):,}')
print(f'Number of rows dropped: {len(data) - len(states_df)}')


# check that correct number of rows were dropped
num_PR = len(data.loc[data['State']=="PR"])
num_nan = data['State'].isna().sum()

print(f'Number of dropped rows should be: {num_PR + num_nan}')

Number of rows in original dataset: 303,864
Number of rows in trimmed dataset: 303,305
Number of rows dropped: 559
Number of dropped rows should be: 559


## Break down by Measures

## Look at Violent Crime Rates

In [71]:
# filter dataframe for violent crime rate measure
crime_df = states_df.loc[states_df['Measure name'] == "Violent crime rate"]
crime_df

Unnamed: 0,State,County,State code,County code,Year span,Measure name,Measure id,Numerator,Denominator,Raw value,Confidence Interval Lower Bound,Confidence Interval Upper Bound,Data Release Year,fipscode
0,US,United States,0.0,0.0,2003-2005,Violent crime rate,43.0,1.328751e+06,2.748771e+08,483.398066,,,,0.0
1,US,United States,0.0,0.0,2004-2006,Violent crime rate,43.0,1.340929e+06,2.776128e+08,483.021233,,,,0.0
2,US,United States,0.0,0.0,2005-2007,Violent crime rate,43.0,1.355853e+06,2.804077e+08,483.529230,,,2010.0,0.0
3,US,United States,0.0,0.0,2006-2008,Violent crime rate,43.0,1.366928e+06,2.876146e+08,475.263942,,,2011.0,0.0
4,US,United States,0.0,0.0,2007-2009,Violent crime rate,43.0,1.339439e+06,2.925763e+08,457.808585,,,2012.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22346,WY,Weston County,56.0,45.0,2005-2007,Violent crime rate,43.0,1.000000e+01,5.885000e+03,169.923534,,,2010.0,56045.0
22347,WY,Weston County,56.0,45.0,2006-2008,Violent crime rate,43.0,1.100000e+01,5.970333e+03,184.244319,,,2011.0,56045.0
22348,WY,Weston County,56.0,45.0,2007-2009,Violent crime rate,43.0,1.133333e+01,6.093333e+03,185.995624,,,2012.0,56045.0
22349,WY,Weston County,56.0,45.0,2008-2010,Violent crime rate,43.0,8.333333e+00,6.141000e+03,135.699940,,,2013.0,56045.0
