## 2: Population Characteristics (Age 25+)

### Total Deaths 2005-2015

In [2]:
import pandas as pd
total_count = 0
for year in range (2005, 2016):
    file_name = f"{year}_data.csv"
    df = pd.read_csv(file_name, low_memory = False)
    total_count += len(df)
    print(f"{len(df)} mortalities in {year}")

print(f"The total number of mortalities from 2005 to 2015 is {total_count}")

2452506 mortalities in 2005
2430725 mortalities in 2006
2428343 mortalities in 2007
2476811 mortalities in 2008
2441219 mortalities in 2009
2472542 mortalities in 2010
2519842 mortalities in 2011
2547864 mortalities in 2012
2601452 mortalities in 2013
2631171 mortalities in 2014
2718198 mortalities in 2015
The total number of mortalities from 2005 to 2015 is 27720673


### Total Heart Disease Deaths 2005-2015

In [66]:
import pandas as pd
df = pd.read_csv("HD_mortality_final.csv", low_memory = False)
print(f"The total number of Heart Disease mortalities from 2005 to 2015 is {len(df)} or {round((len(df)/27720673)*100, 2)}% of total deaths")

The total number of Heart Disease mortalities from 2005 to 2015 is 12364124 or 44.6% of total deaths


### Total Substance Use Deaths 2005-2015

In [2]:
import pandas as pd
df = pd.read_csv("SU_mortality_final.csv", low_memory = False)
print(f"The total number of Substance Use mortalities from 2005 to 2015 is {len(df)} or {round((len(df)/27720673)*100, 2)}% of total deaths")

The total number of Substance Use mortalities from 2005 to 2015 is 978494 or 3.53% of total deaths


### Number of HD deaths w/ SU listed as a concomitant cause of death

In [1]:
import pandas as pd
df = pd.read_csv("SU_HD_mortality_final.csv", low_memory = False)
print(f"The total number of SU related HD mortalities from 2005 to 2015 is {len(df)} or {round((len(df)/6782558)*100, 2)}% of total HD deaths")

The total number of SU related HD mortalities from 2005 to 2015 is 270578 or 3.99% of total HD deaths


### SU+HD stratified by Sex

In [3]:
import pandas as pd
df = pd.read_csv("SU_HD_mortality_final.csv", low_memory = False)
#Count F vs. M in column "Sex"
counts = df["sex"].value_counts()
f = counts["F"]
m = counts["M"]
print(f"There were {m} male SUHD deaths accounting for {round((m/(len(df)))*100, 2)}% of total SUHD deaths")
print(f"There were {f} male SUHD deaths accounting for {round((f/(len(df)))*100, 2)}% of total SUHD deaths")

There were 205842 male SUHD deaths accounting for 76.07% of total SUHD deaths
There were 64736 male SUHD deaths accounting for 23.93% of total SUHD deaths


### SU+HD stratified by Race

In [8]:
import pandas as pd
df = pd.read_csv("SU_HD_mortality_final.csv", low_memory = False)
counts = df["race_recode_5"].value_counts()
white = counts[1]
black = counts[2]
american_indian = counts[3]
asian_pacific_islander = counts[4]
print(f"White = {round((white/(len(df)))*100, 2)}% of total SUHD deaths")
print(f"Black = {round((black/(len(df)))*100, 2)}% of total SUHD deaths")
print(f"American Indian = {round((american_indian/(len(df)))*100, 2)}% of total SUHD deaths")
print(f"Asian or Pacific Islander = {round((asian_pacific_islander/(len(df)))*100, 2)}% of total SUHD deaths")

White = 81.58% of total SUHD deaths
Black = 15.37% of total SUHD deaths
American Indian = 1.82% of total SUHD deaths
Asian or Pacific Islander = 1.22% of total SUHD deaths


### SU+HD stratified by Age

In [9]:
import pandas as pd
df = pd.read_csv("SU_HD_mortality_final.csv", low_memory = False)
counts = df["age_recode_custom"].value_counts()
five = counts[5]
four = counts[4]
three = counts[3]
two = counts[2]
one = counts[1]
total = len(df)
print(f"Ages 25-39 accounted for {round((one/(total))*100, 2)}% of total SU+HD deaths")
print(f"Ages 40-54 accounted for {round((two/(total))*100, 2)}% of total SU+HD deaths")
print(f"Ages 55-69 accounted for {round((three/(total))*100, 2)}% of total SU+HD deaths")
print(f"Ages 70-84 accounted for {round((four/(total))*100, 2)}% of total SU+HD deaths")
print(f"Ages 85+ accounted for {round((five/(total))*100, 2)}% of total SU+HD deaths")
#Age groups
#0-24 25-39, 40-54, 55-69, 70-84, 85+

Ages 25-39 accounted for 10.6% of total SU+HD deaths
Ages 40-54 accounted for 39.24% of total SU+HD deaths
Ages 55-69 accounted for 37.87% of total SU+HD deaths
Ages 70-84 accounted for 10.72% of total SU+HD deaths
Ages 85+ accounted for 1.57% of total SU+HD deaths


### SU+HD stratified by substance **

In [7]:
import pandas as pd

df = pd.read_csv('SU_HD_mortality_final.csv', low_memory = False)

categories = {
    'Opioids': ['F11', 'R781', 'T400', 'T401', 'T402', 'T403', 'T404', 'T406', 'X62', 'Y12'],
    'Cannabis': ['F12', 'T407'],
    'Cocaine': ['F14', 'R782', 'T405'],
    'Sedatives/Hypnotics': ['F13', 'T423', 'T424', 'T426', 'T427', 'X61', 'Y11'],
    'Alcohol': ['E244','F10','G312','G621','G721','I426','K70','K852','K860','R780','T51','X65','Y15'],
    'Stimulants': ['F15','T436'],
}

cond_cols = ['icd_code_10th_revision'] + [f'record_condition_{i}' for i in range(1, 21)] #creates a list of the columns

def row_has_prefix(row, prefixes):
    for col in cond_cols: #for each column in the list/csv
        val = str(row[col]).strip() 
        if val != 'nan' and any(val.startswith(prefix) for prefix in prefixes):
            return True
    return False

total_deaths = len(df)

for cat, prefixes in categories.items():
    mask = df.apply(lambda row: row_has_prefix(row, prefixes), axis=1)
    percent = mask.sum() / total_deaths * 100
    print(f"{cat}: {percent:.1f}%")


Opioids: 15.5%
Cannabis: 0.4%
Cocaine: 10.3%
Sedatives/Hypnotics: 4.8%
Alcohol: 61.6%
Stimulants: 5.4%
