In [169]:
#set up dependencies
import pandas as pd
import numpy as np

#import datafile
naloxone = pd.read_csv("original data pulls/overdose_network_data.csv")

In [170]:
#keep only columns we want to look at for now
naloxone_summary = naloxone[["Incident Date", "Incident County Name", "Naloxone Administered",
                            "Survive", "Susp OD Drug Desc"]]
#rename so we have easier handle to work with for syntax
naloxone_summary = naloxone_summary.rename(columns={"Incident Date": "date",
                                                    "Incident County Name": "County",
                                                    "Naloxone Administered": "naloxone",
                                                    "Susp OD Drug Desc": "od_drug_name"})
naloxone_summary.head()

Unnamed: 0,date,County,naloxone,Survive,od_drug_name
0,1/4/18,Delaware,Y,N,COCAINE/CRACK
1,1/4/18,Delaware,Y,N,COCAINE/CRACK
2,1/4/18,Delaware,Y,N,HEROIN
3,1/4/18,Delaware,Y,N,HEROIN
4,1/26/18,Chester,N,N,HEROIN


In [171]:
#next, we want to filter by date so we can tally by month
#break down date into month and year
naloxone_summary["year"]=[d.split('/')[2] for d in naloxone_summary.date]
naloxone_summary["month"]=[d.split('/')[0] for d in naloxone_summary.date]

#confirm split and new columns worked
naloxone_summary.head()

Unnamed: 0,date,County,naloxone,Survive,od_drug_name,year,month
0,1/4/18,Delaware,Y,N,COCAINE/CRACK,18,1
1,1/4/18,Delaware,Y,N,COCAINE/CRACK,18,1
2,1/4/18,Delaware,Y,N,HEROIN,18,1
3,1/4/18,Delaware,Y,N,HEROIN,18,1
4,1/26/18,Chester,N,N,HEROIN,18,1


In [172]:
#find how many years are in data
naloxone_summary["year"].unique()

array(['18', '19'], dtype=object)

In [173]:
#drop rows so we only have data for 2018
indexYears = naloxone_summary[naloxone_summary['year']=="18"].index
naloxone_summary.drop(indexYears, inplace=True)
naloxone_summary


Unnamed: 0,date,County,naloxone,Survive,od_drug_name,year,month
7172,1/1/19,Chester,N,N,HEROIN,19,1
7173,1/1/19,Allegheny,N,Y,FENTANYL,19,1
7174,1/1/19,Allegheny,N,Y,FENTANYL ANALOG/OTHER SYNTHETIC OPIOID,19,1
7175,1/1/19,Allegheny,N,Y,HEROIN,19,1
7176,1/1/19,Allegheny,N,Y,HEROIN,19,1
7177,1/1/19,Allegheny,N,Y,UNKNOWN,19,1
7178,1/1/19,Montgomery,N,N,COCAINE/CRACK,19,1
7179,1/1/19,Montgomery,N,N,FENTANYL,19,1
7180,1/1/19,Montgomery,N,N,HEROIN,19,1
7181,1/1/19,Lancaster,Y,Y,FENTANYL,19,1


In [174]:
#map yes values to 1 and no values to 0
naloxone_summary["naloxone"]= naloxone_summary["naloxone"].map({'N':0, 'Y':1})
naloxone_summary["Survive"]=naloxone_summary["Survive"].map({'N':0, 'Y':1})
naloxone_summary.head()



Unnamed: 0,date,County,naloxone,Survive,od_drug_name,year,month
7172,1/1/19,Chester,0,0.0,HEROIN,19,1
7173,1/1/19,Allegheny,0,1.0,FENTANYL,19,1
7174,1/1/19,Allegheny,0,1.0,FENTANYL ANALOG/OTHER SYNTHETIC OPIOID,19,1
7175,1/1/19,Allegheny,0,1.0,HEROIN,19,1
7176,1/1/19,Allegheny,0,1.0,HEROIN,19,1


In [175]:
#loop through data frame to build monthly counts/tally
months = np.arange(2,9,1)
months = months.astype(str)

a = 1
b = 2

#do January outside the loop so there is a df to merge future loop iterations to
months2019a = naloxone_summary.loc[naloxone_summary['month']=="1"]
months2019b = months2019a.groupby("County").naloxone.sum()
months2019 = pd.DataFrame(months2019b)
months2019 = months2019.reset_index()

months2019

Unnamed: 0,County,naloxone
0,Adams,1
1,Allegheny,37
2,Armstrong,1
3,Beaver,3
4,Berks,7
5,Blair,3
6,Bucks,33
7,Butler,4
8,Cambria,8
9,Carbon,3


In [176]:
#loop through for remaining months
for month in months:
    naloxone_month = naloxone_summary.loc[naloxone_summary["month"]==month]
    naloxone_month = naloxone_month.groupby("County").naloxone.sum()
    naloxone_month = pd.DataFrame(naloxone_month)
    naloxone_month = naloxone_month.reset_index()
    months2019 = pd.merge(months2019, naloxone_month, on="County", how="outer")
    months2019 = months2019.rename(columns={"naloxone_x": a, "naloxone_y":b })
    a+=1
    b+=1

months2019 = months2019.rename(columns={1: "Jan19", 2: "Feb19", 3: "Mar19", 4: "Apr19", 
                                        5: "May19", 6: "Jun19", 7: "Jul19", 8: "Aug19"})
    
months2019["Total"] = months2019.sum(axis=1)
months2019

months2019.to_csv("clean data/naloxone2019.csv")

naloxone_month_totals = months2019.sum(axis=0)
naloxone_month_totals = naloxone_month_totals.drop(labels=["County"])
naloxone_month_totals

Jan19     324
Feb19     307
Mar19     365
Apr19     379
May19     367
Jun19     360
Jul19     395
Aug19     158
Total    2655
dtype: object

In [177]:
#now for survival data
survival19 = naloxone_summary.loc[naloxone_summary['naloxone']==1]
survival19.head()

Unnamed: 0,date,County,naloxone,Survive,od_drug_name,year,month
7181,1/1/19,Lancaster,1,1.0,FENTANYL,19,1
7182,1/1/19,Lancaster,1,1.0,HEROIN,19,1
7184,1/1/19,Allegheny,1,1.0,HEROIN,19,1
7187,1/1/19,Beaver,1,1.0,HEROIN,19,1
7188,1/1/19,Beaver,1,1.0,UNKNOWN,19,1


In [178]:
#loop through data frame to build monthly counts/tally
months = np.arange(2,9,1)
months = months.astype(str)

a = 1
b = 2

#do January outside the loop so there is a df to merge future loop iterations to
survive2019a = survival19.loc[survival19['month']=="1"]
survive2019b = survive2019a.groupby("County").Survive.sum()
survive2019 = pd.DataFrame(survive2019b)
survive2019 = survive2019.reset_index()
survive2019 = survive2019.rename(columns={"Survive": "Jan19"})
survive2019.head()

Unnamed: 0,County,Jan19
0,Adams,1.0
1,Allegheny,36.0
2,Armstrong,1.0
3,Beaver,3.0
4,Berks,5.0


In [179]:
#do february now
survive2019a = survival19.loc[survival19['month']=="2"]
survive2019b = survive2019a.groupby("County").Survive.sum()
a = pd.DataFrame(survive2019b)
a = a.reset_index()
a = a.rename(columns={"Survive": "Feb19"})
a.head()

Unnamed: 0,County,Feb19
0,Adams,3.0
1,Allegheny,38.0
2,Armstrong,6.0
3,Beaver,0.0
4,Bedford,1.0


In [180]:
#merge jan and feb
survival = pd.merge(survive2019, a, on="County", how = "outer" )
survival.head()

Unnamed: 0,County,Jan19,Feb19
0,Adams,1.0,3.0
1,Allegheny,36.0,38.0
2,Armstrong,1.0,6.0
3,Beaver,3.0,0.0
4,Berks,5.0,4.0


In [181]:
#march
survive2019a = survival19.loc[survival19['month']=="3"]
survive2019b = survive2019a.groupby("County").Survive.sum()
b = pd.DataFrame(survive2019b)
b = b.reset_index()
b = b.rename(columns={"Survive": "Mar19"})
b.head()

Unnamed: 0,County,Mar19
0,Adams,2.0
1,Allegheny,34.0
2,Armstrong,3.0
3,Beaver,5.0
4,Berks,6.0


In [182]:
#merge together
survival = pd.merge(survival, b, on="County", how = "outer" )
survival.head()

Unnamed: 0,County,Jan19,Feb19,Mar19
0,Adams,1.0,3.0,2.0
1,Allegheny,36.0,38.0,34.0
2,Armstrong,1.0,6.0,3.0
3,Beaver,3.0,0.0,5.0
4,Berks,5.0,4.0,6.0


In [183]:
#april
survive2019a = survival19.loc[survival19['month']=="4"]
survive2019b = survive2019a.groupby("County").Survive.sum()
c = pd.DataFrame(survive2019b)
c = c.reset_index()
c = c.rename(columns={"Survive": "Apr19"})
c.head()

#merge together
survival = pd.merge(survival, c, on="County", how = "outer" )
survival.head()

Unnamed: 0,County,Jan19,Feb19,Mar19,Apr19
0,Adams,1.0,3.0,2.0,6.0
1,Allegheny,36.0,38.0,34.0,30.0
2,Armstrong,1.0,6.0,3.0,2.0
3,Beaver,3.0,0.0,5.0,2.0
4,Berks,5.0,4.0,6.0,9.0


In [184]:
#may
survive2019a = survival19.loc[survival19['month']=="5"]
survive2019b = survive2019a.groupby("County").Survive.sum()
d = pd.DataFrame(survive2019b)
d = d.reset_index()
d = d.rename(columns={"Survive": "May19"})
d.head()

#merge together
survival = pd.merge(survival, d, on="County", how = "outer" )
survival.head()

Unnamed: 0,County,Jan19,Feb19,Mar19,Apr19,May19
0,Adams,1.0,3.0,2.0,6.0,1.0
1,Allegheny,36.0,38.0,34.0,30.0,32.0
2,Armstrong,1.0,6.0,3.0,2.0,1.0
3,Beaver,3.0,0.0,5.0,2.0,1.0
4,Berks,5.0,4.0,6.0,9.0,10.0


In [185]:
#june
survive2019a = survival19.loc[survival19['month']=="6"]
survive2019b = survive2019a.groupby("County").Survive.sum()
e = pd.DataFrame(survive2019b)
e = e.reset_index()
e = e.rename(columns={"Survive": "Jun19"})
e.head()

#merge together
survival = pd.merge(survival, e, on="County", how = "outer" )
survival.head()

Unnamed: 0,County,Jan19,Feb19,Mar19,Apr19,May19,Jun19
0,Adams,1.0,3.0,2.0,6.0,1.0,2.0
1,Allegheny,36.0,38.0,34.0,30.0,32.0,43.0
2,Armstrong,1.0,6.0,3.0,2.0,1.0,1.0
3,Beaver,3.0,0.0,5.0,2.0,1.0,1.0
4,Berks,5.0,4.0,6.0,9.0,10.0,5.0


In [186]:
#july
survive2019a = survival19.loc[survival19['month']=="7"]
survive2019b = survive2019a.groupby("County").Survive.sum()
f = pd.DataFrame(survive2019b)
f = f.reset_index()
f = f.rename(columns={"Survive": "Jul19"})
f.head()

#merge together
survival = pd.merge(survival, f, on="County", how = "outer" )
survival.head()

Unnamed: 0,County,Jan19,Feb19,Mar19,Apr19,May19,Jun19,Jul19
0,Adams,1.0,3.0,2.0,6.0,1.0,2.0,
1,Allegheny,36.0,38.0,34.0,30.0,32.0,43.0,57.0
2,Armstrong,1.0,6.0,3.0,2.0,1.0,1.0,2.0
3,Beaver,3.0,0.0,5.0,2.0,1.0,1.0,
4,Berks,5.0,4.0,6.0,9.0,10.0,5.0,6.0


In [187]:
#august
survive2019a = survival19.loc[survival19['month']=="8"]
survive2019b = survive2019a.groupby("County").Survive.sum()
g = pd.DataFrame(survive2019b)
g = g.reset_index()
g = g.rename(columns={"Survive": "Aug19"})
g.head()

#merge together
survival = pd.merge(survival, g, on="County", how = "outer" )
survival.head()

Unnamed: 0,County,Jan19,Feb19,Mar19,Apr19,May19,Jun19,Jul19,Aug19
0,Adams,1.0,3.0,2.0,6.0,1.0,2.0,,1.0
1,Allegheny,36.0,38.0,34.0,30.0,32.0,43.0,57.0,17.0
2,Armstrong,1.0,6.0,3.0,2.0,1.0,1.0,2.0,
3,Beaver,3.0,0.0,5.0,2.0,1.0,1.0,,1.0
4,Berks,5.0,4.0,6.0,9.0,10.0,5.0,6.0,


In [188]:
survival.to_csv("clean data/survival2019_bymonth.csv")