In [None]:
import plotly
import plotly.express as px
import plotly.graph_objs as go
import plotly.offline as offline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import pandas as pd

In [None]:
!pip install xlrd

## Supplemental Nutrition Assistance Program (SNAP) Data Analysis
### What is SNAP?
SNAP is a US Federal government funded program that provides assistance with food budgets to families in need. By examining the costs to run this program, we can discover trends associated with the economy, as well as the relation of costs to poverty levels in the country. We will also examine if SNAP benefits have kept up with the ever-increasing cost of food in the US. 

First, let's look at the data by state, to see which states spend the most on SNAP. Data includes the cost of administration of the program, which is estimated to be about 8% of the total cost. ([Source](https://www.cbpp.org/92-percent-of-federal-snap-spending-is-for-food)) The chart below shows how much each state spent from 2010 to 2019. 

In [None]:
init_notebook_mode(connected=True)
# Names of the US regions which are also sheet titles 
regions = ['NERO', 'MARO', 'SERO', 'MWRO', 'SWRO', 'MPRO', 'WRO']
regions_len1 = [7, 8, 8, 6, 5, 10, 9]
regions_len2 = [8, 7, 8, 7, 7, 8, 8]
file_suffix = '-cleaned.xls'
snap_init_df = pd.read_excel('../input/d/sbengali/publicassistance/FY10-cleaned.xls', sheet_name=regions)

# Create a dataframe containing the totals for all the states over each fiscal year of data
columns = ['State', 'Fiscal Year']
columns.extend(snap_init_df['NERO'].columns[1:].tolist())
new_columns = [c.strip() for c in columns]
all_states_df = pd.DataFrame(columns=new_columns)
year = 10
begin = 0
end = 14
for i in range(0, 11):
    file_name = '../input/d/sbengali/publicassistance/FY' + str(year) + file_suffix
    temp_df = pd.read_excel(file_name, sheet_name=regions)
    for j in range(0, len(regions)):
        for k in range(0, 10):
            temp_records = temp_df[regions[j]].iloc[:,- 5:][begin:end]
            if begin < len(temp_df[regions[j]]):
                state = temp_df[regions[j]].iloc[begin]['Fiscal Year and Month']
                fiscal_yr = 2000 + year
                records = [state, fiscal_yr]
                records.extend(temp_records.iloc[13].tolist())
                df_length = len(all_states_df)
                all_states_df.loc[df_length] = records
            if year < 15:
                # Files for fiscal years 2010-14 have two blank lines between states' records
                begin = begin + 14 + 2
                end = end + 14 + 2
            else:
                begin = begin + 14 + 1
                end = end + 14 + 1
        # Reset the row indices
        begin = 0
        end = 14
    year = year + 1

In [None]:
# Read in the state abbreviations that will be required for plotting the data on a map
state_abbrv_df = pd.read_json('../input/d/sbengali/publicassistance/state-abbrvs.json')
state_abbrv_df.rename(columns={'abbreviation':'State-Code'}, inplace=True)
state_abbrv_df.rename(columns={'name':'State'}, inplace=True)
state_abbrv_df.drop(state_abbrv_df[state_abbrv_df['State'] == 'North Carolina'].index, inplace = True)
state_abbrv_df = state_abbrv_df.replace(['District Of Columbia'],'District of Columbia')
state_abbrv_df.reset_index(drop=True, inplace=True)

In [None]:
# Drop rows with region names in state column
all_states_df.drop(all_states_df[all_states_df['State'].isin(regions)].index, inplace = True)
# Drop data that can't be plotted
drop_states = ['North Carolina', 'Guam', 'Virgin Islands']
all_states_df.drop(all_states_df[all_states_df['State'].isin(drop_states)].index, inplace = True)
# Remove 2020 data because it's incomplete
all_states_df.drop(all_states_df[all_states_df['Fiscal Year'] == 2020].index, inplace = True)
all_states_df.reset_index(drop=True, inplace=True)

In [None]:
# Merge dataframes to include abbreviation data for the state names
all_states_df = pd.merge(all_states_df, state_abbrv_df, on ='State', how ='inner')

In [None]:
traces = []
# Filter data for each year for the slider to work
for year in all_states_df['Fiscal Year'].unique():
    trace_df = all_states_df[all_states_df['Fiscal Year']==year].copy()
    trace = dict(
                type = 'bar',
                visible = False,
                x=trace_df['State'],
                y=trace_df['Cost'],
                marker=dict(color='#157612'),
                width=0.5,
                showlegend=False,
                hovertemplate=
                "<b>%{x}</b><br><br>"
                "<b>$%{y}</b><br><br>"
                "<extra></extra>",
             )
    traces.append(trace)

traces[0]['visible'] = True

# Create and add slider
steps_bar = []
for i in range(len(traces)):
    step = dict(
        method="restyle",
        args=['visible', [False] * len(traces)],
        label='<b>Fiscal Year {}</b>'.format(i + 2010))
    step['args'][1][i] = True
    steps_bar.append(step)

# Create the sliders from the steps
sliders_bar = [dict(active=0, pad={"t": 90}, steps=steps_bar)]
layout_bar = go.Layout(width=900, height=700, title='Yearly Total SNAP Cost by State')
fig_bar = go.Figure(data=traces, layout=layout_bar)
fig_bar.update_layout(
    sliders=sliders_bar, 
    xaxis_tickangle=-45, 
    yaxis_title='Total SNAP Cost', 
    title_x=0.5,
    margin=dict(l=20, r=20, t=40, b=20))
fig_bar.update_yaxes(range=[all_states_df['Cost'].min(), all_states_df['Cost'].max()])
plotly.offline.iplot(fig_bar)

By filtering the data for different fiscal years, we can see which states spend more than others. (Note that North Carolina data is not visible because it was not made available to the USDA.) Costs can be affected by major events such as natural disasters. For example, Florida had higher spending in 2018, when residents were affected by hurricanes. Note the jump in SNAP costs to Florida in the year 2018. Out of all the states, California consistently spent the most on SNAP in the time frame shown above. 

In [None]:
# Read in data for state populations
pop_df = pd.read_csv('../input/d/sbengali/publicassistance/state-populations.csv')
pop_df.rename(columns={'State':'State-Code'}, inplace=True)
pop_df.sort_values(by=['State-Code'], ascending=True, axis=0, inplace=True)
pop_df.rename(columns={'Year':'Fiscal Year'}, inplace=True)

In [None]:
# Merge dataframes to include population data for the states
all_states_df = all_states_df.merge(pop_df, left_on=['State-Code', 'Fiscal Year'], right_on=['State-Code', 'Fiscal Year'])

In [None]:
# Calculate percentage of state residents using SNAP benefits
all_states_df[['Percentage Participation']] = all_states_df[['Persons Participation']].div(all_states_df['Population'].values, axis=0)
all_states_df['Percentage Participation'] = 100 * all_states_df['Percentage Participation']

Let's take a look at the percentage of the population of each state that participates in SNAP. This percentage changes over the years 2010 to 2019, dropping to a relative low in 2015, before rising again. 

In [None]:
all_states_df.sort_values(by=['State'], ascending=True, axis=0, inplace=True)
traces = []
# Filter data for each year for the slider to work
for year in all_states_df['Fiscal Year'].unique():
    trace_df = all_states_df[all_states_df['Fiscal Year']==year].copy()
    trace_df.sort_values(by=['State'], ascending=True, axis=0, inplace=True)
    trace = dict(
                type = 'bar',
                visible = False, 
                x=trace_df['State'],
                y=trace_df['Percentage Participation'],
                marker=dict(color='#18329B'),
                width=0.5,
                showlegend=False, 
                hovertemplate=
                "<b>%{x}</b><br><br>"
                "<b>%{y:.2f}%</b><br><br>"
                "<extra></extra>",
             )
    traces.append(trace)

traces[0]['visible'] = True

# Create and add slider steps
steps_bar = []
for i in range(len(traces)):
    step = dict(
        method="restyle",
        args=['visible', [False] * len(traces)],
        label='<b>Fiscal Year {}</b>'.format(i + 2010))
    step['args'][1][i] = True
    steps_bar.append(step)

# Create the sliders from the steps
sliders_bar = [dict(active=0, pad={"t": 90}, steps=steps_bar)]
layout_bar = go.Layout(width=1000, height=700, title='Yearly Percentage SNAP Participation by State')
fig_bar = go.Figure(data=traces, layout=layout_bar)
fig_bar.update_layout(
    sliders=sliders_bar, 
    xaxis_tickangle=-45, 
    yaxis_title='Percentage Participation', 
    title_x=0.5,
    margin=dict(l=20, r=45, t=40, b=20))
fig_bar.update_yaxes(range=[all_states_df['Percentage Participation'].min(), 30]) 
plotly.offline.iplot(fig_bar)

## SNAP Costs and Poverty Levels:
National participation by needy families in SNAP closely mirrors the program's costs. We can see in the next two graphs how the trends for the number of families that meet the poverty threshold (as [defined](https://www.census.gov/data/tables/time-series/demo/income-poverty/historical-poverty-thresholds.html) by the US Census Bureau), and the number of SNAP participants follows trends in total SNAP costs.

In [None]:
snap_hist_df = pd.read_csv('../input/d/sbengali/publicassistance/SNAP_history_1969_2019.csv')
snap_hist_df['Total Costs(M)'] = 1000000 * snap_hist_df['Total Costs(M)'] # Convert to millions
snap_hist_df['Average Participation'] = 1000 * snap_hist_df['Average Participation'] # Convert to thousands
snap_hist_df = snap_hist_df.astype(float)
pov_hist_df = pd.read_csv('../input/d/sbengali/publicassistance/historical-poverty-data-1959-2019.csv')

# Drop poverty data prior to 1969 to match with SNAP data range
pov_hist_df.drop(pov_hist_df[pov_hist_df['Year'] < 1969].index, inplace = True)
pov_hist_df['Number of poor families'] = 1000 * pov_hist_df['Number of poor families'] # Convert to thousands
pov_hist_df = pov_hist_df.astype(float)

In [None]:
# Plot total costs and number of poor families
fig2 = plotly.subplots.make_subplots(rows=1, cols=2)
fig2.add_trace(go.Scatter(x=snap_hist_df['Fiscal Year'].values, y=snap_hist_df['Total Costs(M)'].values, 
                          name='SNAP Cost', line=dict(color="#1E1EB1")), row=1, col=1)
fig2.update_yaxes(rangemode="tozero", tickprefix="$")
fig2.update_xaxes(tick0=1969, dtick=10)
fig2.add_trace(go.Scatter(x=pov_hist_df['Year'].values, y=pov_hist_df['Number of poor families'].values, 
                          name='Number of Families', line=dict(color='#CD2424')), row=1, col=2)
fig2.update_layout(
    title='Total SNAP Costs and Families at the Poverty Threshold',
    title_x=0.5,
    margin=dict(l=10, r=10, t=40, b=20),
    width=1200, 
    height=500,
    showlegend=False,
)
fig2.update_yaxes(
    title_text='Total SNAP Cost',
    tickfont=dict(
        color='#1E1EB1'
    ),
    row=1, 
    col=1
)
fig2.update_yaxes(
    title_text='Number of Families',
    tickfont=dict(
        color='#CD2424'
    ),
    row=1, 
    col=2
)
    
fig2.show()

In [None]:
# Plot total costs and average participation in SNAP
fig3 = plotly.subplots.make_subplots(rows=1, cols=2)
fig3.add_trace(go.Scatter(x=snap_hist_df['Fiscal Year'].values, y=snap_hist_df['Total Costs(M)'].values, 
                          name='SNAP Cost', line=dict(color="#1E1EB1")), row=1, col=1)
fig3.update_yaxes(rangemode="tozero", tickprefix="$")
fig3.update_xaxes(tick0=1969, dtick=10)
fig3.add_trace(go.Scatter(x=snap_hist_df['Fiscal Year'].values, y=snap_hist_df['Average Participation'].values, 
                          name='Number of Participants in SNAP', line=dict(color='#C112B1')), row=1, col=2)
fig3.update_layout(
    title='Total SNAP Costs and Participants in SNAP',
    title_x=0.5,
    margin=dict(l=10, r=10, t=40, b=20),
    width=1200, 
    height=500,
    showlegend=False,
)
fig3.update_yaxes(
    title_text='Total SNAP Cost',
    tickfont=dict(
        color='#1E1EB1'
    ),
    row=1, 
    col=1
)
fig3.update_yaxes(
    title='Number of Participants',
        tickfont=dict(
            color='#C112B1'
    ),
    row=1, 
    col=2
)
    
fig3.show()

## SNAP Benefits and the Cost of Food:
Next, let's take a look at how much the average monthly SNAP benefit to each person has changed over the years.

In [None]:
# Plot total costs and average monthly benefit per person
fig4 = plotly.subplots.make_subplots(rows=1, cols=2)
fig4.add_trace(go.Scatter(x=snap_hist_df['Fiscal Year'].values, y=snap_hist_df['Total Costs(M)'].values,
                          name='SNAP Cost', line=dict(color="#1E1EB1")), row=1, col=1)
fig4.update_yaxes(rangemode="tozero", tickprefix="$")
fig4.update_xaxes(tick0=1969, dtick=10)
fig4.add_trace(go.Scatter(x=snap_hist_df['Fiscal Year'].values, y=snap_hist_df['Average Benefit Per Person'].values, 
                          name='Monthly Benefit Per Participant', line=dict(color='#53A729')), row=1, col=2)
fig4.update_layout(
    title='Total SNAP Costs and Monthly SNAP Benefit per Participant',
    title_x=0.5,
    margin=dict(l=10, r=10, t=40, b=20),
    width=1200, 
    height=500,
    showlegend=False,
)
fig4.update_yaxes(
    title_text='Total SNAP Cost',
    tickfont=dict(
        color='#1E1EB1'
    ),
    row=1, 
    col=1
)
fig4.update_yaxes(
    title='Monthly Benefit',
        tickfont=dict(
            color='#53A729'
    ),
    row=1, 
    col=2
)

fig4.show()

We can see that the average monthly SNAP benefit per person has gone up over time along with total SNAP costs, but this brings up another question - how does this relate to the average cost of food which has also risen over the years?

In [None]:
# Plot data on monthly SNAP benefits and national food expense per person
food_hist_df = pd.read_csv('../input/d/sbengali/publicassistance/historical-food-expense.csv')
food_hist_df = food_hist_df.astype(float)
fig5 = go.Figure()
fig5.add_trace(go.Scatter(x=food_hist_df['Year'].values, y=food_hist_df['Monthly expense per person'].values, 
                          name='Monthly Food Expense Per Person Nationwide', line=dict(color="#661BCB")))
fig5.update_yaxes(rangemode="tozero", tickprefix="$")
fig5.update_xaxes(dtick=4, nticks=15)
fig5.add_trace(go.Scatter(x=snap_hist_df['Fiscal Year'].values, y=snap_hist_df['Average Benefit Per Person'].values, 
                          name='Monthly SNAP Benefit Per Participant', line=dict(color='#53A729')))
fig5.update_layout(
    title='Monthly Food Expense Nationwide and Monthly SNAP Benefit Per Participant',
    title_x=0.51,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="right",
        x=0.47
    ),
    margin=dict(l=20, r=20, t=40, b=20),
    width=800, 
    height=600,
)

fig5.show()

The graph above shows us that over time, the monthly food expense per person in the US has risen quicker than the monthly SNAP benefit per person. This makes it harder to buy food solely using SNAP without any supplemental income. 

## Conclusion:
In 2019, SNAP benefits would only cover about 28% of the bill of a month's worth of food, while in 1996, it was 34%. Clearly, in order for needy families to be able to have access to a healthy and balanced diet, an increase in the monthly SNAP benefit would be extremely beneficial. 