In [1]:
import pandas as pd
from vega_datasets import data
import altair as alt
import matplotlib.pyplot as plt
import geopandas

In [2]:
alc_df = pd.read_csv('filtered_alc_mort.csv')
alc_df.head()
alc_df["Fatalities"] = alc_df["Data_Value"].replace("-" , 0).fillna(0).astype(int)

In [3]:

state = geopandas.read_file("ne_110m_admin_1_states_provinces/ne_110m_admin_1_states_provinces.shp")
state.head()

Unnamed: 0,featurecla,scalerank,adm1_code,diss_me,iso_3166_2,wikipedia,iso_a2,adm0_sr,name,name_alt,...,FCLASS_ID,FCLASS_PL,FCLASS_GR,FCLASS_IT,FCLASS_NL,FCLASS_SE,FCLASS_BD,FCLASS_UA,FCLASS_TLC,geometry
0,Admin-1 scale rank,2,USA-3514,3514,US-MN,http://en.wikipedia.org/wiki/Minnesota,US,1,Minnesota,MN|Minn.,...,,,,,,,,,,"POLYGON ((-89.95766 47.28691, -90.13175 47.292..."
1,Admin-1 scale rank,2,USA-3515,3515,US-MT,http://en.wikipedia.org/wiki/Montana,US,1,Montana,MT|Mont.,...,,,,,,,,,,"POLYGON ((-116.04823 49.00037, -113.05950 49.0..."
2,Admin-1 scale rank,2,USA-3516,3516,US-ND,http://en.wikipedia.org/wiki/North_Dakota,US,1,North Dakota,ND|N.D.,...,,,,,,,,,,"POLYGON ((-97.22894 49.00089, -97.21414 48.902..."
3,Admin-1 scale rank,2,USA-3517,3517,US-HI,http://en.wikipedia.org/wiki/Hawaii,US,8,Hawaii,HI|Hawaii,...,,,,,,,,,,"MULTIPOLYGON (((-155.93665 19.05939, -155.9080..."
4,Admin-1 scale rank,2,USA-3518,3518,US-ID,http://en.wikipedia.org/wiki/Idaho,US,1,Idaho,ID|Idaho,...,,,,,,,,,,"POLYGON ((-116.04823 49.00037, -115.96780 47.9..."


In [4]:
alc_df_age = alc_df.groupby(by = ['LocationDesc', 'AgeGroup'], as_index= False).sum("Fatalities")

ages = alc_df_age['AgeGroup'].unique()
ages = list(set(ages))
ages

alc_df_age = alc_df_age.iloc[:, [0,1,4]]
alc_df_age = alc_df_age[alc_df_age["AgeGroup"] != "Under 21"]
#alc_df_age = alc_df_age.pivot_table(index="LocationDesc", columns="AgeGroup", values="Fatalities")
#alc_df_age.reset_index(inplace = True)
alc_df_age = alc_df_age[alc_df_age.LocationDesc != "District of Columbia"]
alc_df_age = alc_df_age[alc_df_age.LocationDesc != "United States"]
#alc_df_age = alc_df_age.drop(alc_df_age[alc_df_age.LocationDesc != ["District of Columbia", 'United States']].index)

In [18]:
alc_df_age.AgeGroup.unique()

array(['0-19', '20-34', '35-49', '50-64', '65+', 'Overall'], dtype=object)

In [19]:
full = pd.merge(state, alc_df_age, left_on='woe_name', right_on='LocationDesc', how='outer')
full.head()
full.shape


(301, 125)

In [20]:
full.dropna(subset=['AgeGroup'], inplace=True)

In [21]:
pop_df = pd.read_csv('nst-est2019-01.csv')
pop_df['state'] = list(map(lambda x: x.replace('.', ''), pop_df['state']))
pop_df.head()

Unnamed: 0,state,population
0,Alabama,4863525.0
1,Alaska,741456.0
2,Arizona,6941072.0
3,Arkansas,2989918.0
4,California,39167117.0


In [22]:
full = pd.merge(full, pop_df, left_on='woe_name', right_on='state', how='inner')
full = full[full['AgeGroup'] != 'Under 21']
full.AgeGroup.unique()

array(['0-19', '20-34', '35-49', '50-64', '65+', 'Overall'], dtype=object)

In [23]:
full.columns

Index(['featurecla', 'scalerank', 'adm1_code', 'diss_me', 'iso_3166_2',
       'wikipedia', 'iso_a2', 'adm0_sr', 'name', 'name_alt',
       ...
       'FCLASS_SE', 'FCLASS_BD', 'FCLASS_UA', 'FCLASS_TLC', 'geometry',
       'LocationDesc', 'AgeGroup', 'Fatalities', 'state', 'population '],
      dtype='object', length=127)

In [24]:
full['per_capita'] = full['Fatalities'] / full['population '] * 1000

In [26]:
# Load the US state data
states = alt.topo_feature(data.us_10m.url, 'states')

# Create a dropdown selection
category_dropdown = alt.binding_radio(options=full['AgeGroup'].unique())
selection = alt.selection_point(fields=['AgeGroup'], bind=category_dropdown, name='Select')

# Define the base map
base = alt.Chart(states).mark_geoshape(
    fill='#EEE',
    stroke='white'
).properties(
    width=500,
    height=300
).project('albersUsa')

# Define the choropleth map
choropleth = alt.Chart(full).mark_geoshape().encode(
    color = 'per_capita:Q',
    tooltip=[
        alt.Tooltip('name:N', title='State'),
        alt.Tooltip('per_capita:Q', title='Number of Alchohol Related Deaths per 1000 People')
    ]
).add_params(
    selection
).transform_filter(
    selection
).properties(
    title="Number of Alchohol Related Deaths per 1000 People"
)

# Combine the base map and the choropleth map
us_choropleth = base + choropleth

# Show the map
us_choropleth



In [13]:
full.shape

(300, 128)

In [14]:
avg_cause_of_death_df = alc_df[['Cause_of_Death', 'Fatalities']]
avg_cause_of_death_df = avg_cause_of_death_df.groupby(by = 'Cause_of_Death', as_index= False).sum('Fatalities')
avg_cause_of_death_df = avg_cause_of_death_df[avg_cause_of_death_df['Cause_of_Death'] != 'Total for All Causes']

In [15]:
alt.Chart(avg_cause_of_death_df).mark_bar().encode(
    x='Cause_of_Death:N',
    y='Fatalities:Q'
)

In [16]:
alt.Chart(avg_cause_of_death_df_st).mark_bar().encode(
    x='Cause_of_Death:N',
    y='Fatalities:Q'
)

NameError: name 'avg_cause_of_death_df_st' is not defined

In [None]:
avg_cause_of_death_df_st = alc_df[['Cause_of_Death', 'Fatalities', 'LocationDesc']]
avg_cause_of_death_df_st = avg_cause_of_death_df_st.groupby(by = ['LocationDesc', 'Cause_of_Death'], as_index= False).sum('Fatalities')
avg_cause_of_death_df_st = avg_cause_of_death_df_st[avg_cause_of_death_df_st['Cause_of_Death'] != 'Total for All Causes']

In [None]:
# Create a dropdown selection
category_dropdown = alt.binding_select(options= avg_cause_of_death_df_st['LocationDesc'].unique())
fat_selection = alt.selection_point(fields=['LocationDesc'], bind=category_dropdown, name='Select')

alt.Chart(avg_cause_of_death_df_st).mark_bar().encode(
    x='Cause_of_Death:N',
    y='Fatalities:Q'
).add_params(
    fat_selection
).transform_filter(
    fat_selection
).properties(
    title="Distribution of Number of Alchohol Attributable Deaths by Type"
)