In [1]:
import requests
import json
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress
import scipy.stats as stats
from scipy.stats import ttest_ind, f_oneway
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [2]:
# CSV files to load
opioid_deaths_to_load = Path('Data/opioid_death_counts.csv')
employment_data_to_load = Path('Data/combined_employment_data.csv')
education_data_to_load = Path('Data/combined_education_data.csv')
income_data_to_load = Path('Data/combined_income_data.csv')
family_size_data_to_load = Path('Data/combined_family_size_data.csv')

# read files and store into Pandas DataFrames
opioid_csv = pd.read_csv(opioid_deaths_to_load, index_col = 0)
employment_csv = pd.read_csv(employment_data_to_load, index_col = 0)
education_csv = pd.read_csv(education_data_to_load, index_col = 0)
income_csv = pd.read_csv(income_data_to_load, index_col = 0)
family_size_csv = pd.read_csv(family_size_data_to_load, index_col = 0)

# merge csv to master file
demographic_vs_opioid_df = pd.merge(employment_csv, opioid_csv, on=['State', 'Year'], how='left')
demographic_vs_opioid_df = pd.merge(demographic_vs_opioid_df, education_csv,  on=['State', 'Year'], how='left')
demographic_vs_opioid_df = pd.merge(demographic_vs_opioid_df, income_csv, on=['State', 'Year'], how='left')
demographic_vs_opioid_df = pd.merge(demographic_vs_opioid_df, family_size_csv, on=['State', 'Year'], how='left')

demographic_vs_opioid_df

FileNotFoundError: [Errno 2] No such file or directory: 'Data\\combined_employment_data.csv'

In [None]:
# set up masterfile
masterfile_data_summary = demographic_vs_opioid_df [[
    'Year',
    'State',
    'Total Opioid Deaths',
    'Median Household Income (in Inflation-Adjusted Dollars)',
    'Total in Labor Force',
    'Total Unemployed in Civilian Labor Force',
    'Total Surveyed',
    'No High School Degree',
    'High School Diploma or GED',
    'College Degree or greater',
    'Estimated Total Household Type(Including Living Alone)',
    'Estimated Total Family Households(Married Couples)',
    'Estimated Total Family Households(Male Householder No Spouse)',
    'Estimated Total Family Households(Female Householder No Spouse)',
    'Estimated Total Family Household(Nonfamily Households)',
    'Estimated Total Family Household(Householder Living Alone)'
]]

# demographic columns grouped with headers
columns = [
    ('Year/State','Year'),
    ('Year/State','State'),
    ('Opioid Data', 'Total Opioid Deaths'),
    ('Income Data', 'Median Household Income (in Inflation–Adjusted Dollars)'),
    ('Employment Data', 'Total in Labor Force'),
    ('Employment Data', 'Total Unemployed in Civilian Labor Force'),
    ('Education Data', 'Total Surveyed'),
    ('Education Data', 'No High School Degree'),
    ('Education Data', 'High School Diploma or GED'),
    ('Education Data', 'College Degree or greater'),
    ('Family Size Data', 'Estimated Total Household Type(Including Living Alone)'),
    ('Family Size Data', 'Estimated Total Family Households(Married Couples)'),
    ('Family Size Data', 'Estimated Total Family Households(Male Householder No Spouse)'),
    ('Family Size Data', 'Estimated Total Family Households(Female Householder No Spouse)'),
    ('Family Size Data', 'Estimated Total Family Household(Nonfamily Households)'),
    ('Family Size Data', 'Estimated Total Family Household(Householder Living Alone)')
]
# implement multi index column headers
multi_index = pd.MultiIndex.from_tuples(columns)
masterfile_data_summary.columns = multi_index

# csv output
masterfile_data_summary.to_csv('Data/masterfile_data.csv')

masterfile_data_summary

In [None]:
# convert year to string

demographic_vs_opioid_df['Year'] = demographic_vs_opioid_df['Year'].astype(str)
demographic_vs_opioid_df = demographic_vs_opioid_df.sort_values(by = 'Year', ascending = True)

# national average
national_average_per_year = demographic_vs_opioid_df.groupby('Year')['Total Opioid Deaths'].mean()

top_5_states = demographic_vs_opioid_df.groupby('State')['Total Opioid Deaths'].sum().nlargest(5).index

top_5_data = demographic_vs_opioid_df[demographic_vs_opioid_df['State'].isin(top_5_states)]

plt.figure(figsize=(12,8))

for state in top_5_states:
    state_data = top_5_data[top_5_data['State'] == state]
    plt.plot(state_data['Year'], state_data['Total Opioid Deaths'], marker = 'o', label = state)

plt.plot(national_average_per_year.index, national_average_per_year.values, color = 'blue', linestyle = '--', linewidth = 2, marker = 'o', label = 'National Average')

plt.xlabel('Year')
plt.ylabel('Total Opioid Deaths')
plt.title('Top 5 States with Highest Opioid Death Rates Over 5 Years')
plt.legend()

plt.show()

In [None]:
import import_ipynb
import Unemployment_Notebook

In [None]:
import import_ipynb
import Education_Notebook

In [None]:
import import_ipynb
import Family_Size_Notebook

In [None]:
import import_ipynb
import Income_Notebook