# Dependencies and Setup

In [1]:
import pandas as pd
from datetime import datetime
import statsmodels.api as sm

In [3]:
# Read from csv 
drug_related_deaths_df = pd.read_csv('Accidental_Drug_Related_Deaths_2012-2018 (2).csv')

In [4]:
# Removing unnecessary columns
narrowed_df = drug_related_deaths_df.loc[:,['ID','Date','Age','Sex','Race','DeathCity','DeathCityGeo','COD','Heroin','Cocaine','Fentanyl','FentanylAnalogue','Oxycodone','Oxymorphone','Ethanol','Hydrocodone','Benzodiazepine','Methadone','Amphet','Tramad','Morphine_NotHeroin','Hydromorphone','Other','OpiateNOS','AnyOpioid']]

In [5]:
# Drop records without Age, Sex and Race entry
narrowed_df = narrowed_df.dropna(subset=['Age', 'Sex','Race'], how='all')

In [6]:
# Fill Nan values by N
narrowed_df.update(narrowed_df[['Heroin','Cocaine','Fentanyl','FentanylAnalogue','Oxycodone','Oxymorphone','Ethanol','Hydrocodone','Benzodiazepine','Methadone','Amphet','Tramad','Morphine_NotHeroin','Hydromorphone','Other','OpiateNOS','AnyOpioid']].fillna('N'))

In [7]:
# Convert date column to Datetime object
narrowed_df['Date'] = pd.to_datetime(narrowed_df['Date'])

In [8]:
narrowed_df.index = pd.to_datetime(narrowed_df['Date'],format='%m/%d/%y %I:%M%p')
narrowed_df = narrowed_df.groupby(by=[narrowed_df.index.year,narrowed_df.index.month]).agg({'ID':'count'})
narrowed_df

Unnamed: 0_level_0,Unnamed: 1_level_0,ID
Date,Date,Unnamed: 2_level_1
2012.0,1.0,31
2012.0,2.0,27
2012.0,3.0,24
2012.0,4.0,30
2012.0,5.0,28
...,...,...
2018.0,8.0,73
2018.0,9.0,92
2018.0,10.0,83
2018.0,11.0,89


In [None]:
df = narrowed_df.sort_values('date', ascending=True)
plt.plot(df['date'], df['count'])
plt.xticks(rotation='vertical')

In [None]:
start_date = datetime(2012,1,1)
end_date = datetime(2018,12,1)

narrowed_df[(start_date <= narrowed_df.index) & (narrowed_df.index <= end_date)].plot(grid='on')

In [None]:
# Accidental deaths per month
narrowed_df['Month'] = pd.DatetimeIndex(narrowed_df['Date']).month

# Accidental deaths per month
narrowed_df = narrowed_df.groupby('Month').count()

narrowed_df.reset_index(level=0, inplace=True)
deaths_per_month = narrowed_df.loc[:,['Month', 'ID']]
deaths_per_month = deaths_per_month.rename(columns={"ID": "Deaths Per Month"})
deaths_per_month.head()


In [None]:
%matplotlib inline
deaths_per_month.plot(grid='on')