# 2024 Dubtech Datathon
### Collaborative project focused on Opioid risk machine learning


In [1]:
import pandas as pd
import seaborn
import matplotlib.pyplot as plt
import os
import scipy
import numpy as np

%matplotlib inline

In [8]:
# test read source data
source_file = os.path.join(os.getcwd(),'Drug_overdose_death_rates__by_drug_type__sex__age__race__and_Hispanic_origin__United_States_20240518.csv')
opioid_df = pd.read_csv(source_file)

opioid_df.head()

Unnamed: 0,INDICATOR,PANEL,PANEL_NUM,UNIT,UNIT_NUM,STUB_NAME,STUB_NAME_NUM,STUB_LABEL,STUB_LABEL_NUM,YEAR,YEAR_NUM,AGE,AGE_NUM,ESTIMATE,FLAG
0,Drug overdose death rates,All drug overdose deaths,0,"Deaths per 100,000 resident population, age-ad...",1,Total,0,All persons,0.1,1999,1,All ages,1.1,6.1,
1,Drug overdose death rates,All drug overdose deaths,0,"Deaths per 100,000 resident population, age-ad...",1,Total,0,All persons,0.1,2000,2,All ages,1.1,6.2,
2,Drug overdose death rates,All drug overdose deaths,0,"Deaths per 100,000 resident population, age-ad...",1,Total,0,All persons,0.1,2001,3,All ages,1.1,6.8,
3,Drug overdose death rates,All drug overdose deaths,0,"Deaths per 100,000 resident population, age-ad...",1,Total,0,All persons,0.1,2002,4,All ages,1.1,8.2,
4,Drug overdose death rates,All drug overdose deaths,0,"Deaths per 100,000 resident population, age-ad...",1,Total,0,All persons,0.1,2003,5,All ages,1.1,8.9,


In [9]:
# exploratory data analysis - descriptive analyses

opioid_df.describe()


Unnamed: 0,PANEL_NUM,UNIT_NUM,STUB_NAME_NUM,STUB_LABEL_NUM,YEAR,YEAR_NUM,AGE_NUM,ESTIMATE
count,6228.0,6228.0,6228.0,6228.0,6228.0,6228.0,6228.0,5117.0
mean,2.5,1.578035,3.028902,3.383006,2008.66474,10.66474,1.354913,4.743443
std,1.707962,0.493913,1.447036,1.526819,5.849512,5.849512,0.301459,6.424471
min,0.0,1.0,0.0,0.1,1999.0,1.0,1.1,0.0
25%,1.0,1.0,2.0,2.1,2004.0,6.0,1.1,0.8
50%,2.5,2.0,3.0,3.22,2009.0,11.0,1.2,2.1
75%,4.0,2.0,4.0,4.6,2014.0,16.0,1.6,6.0
max,5.0,2.0,5.0,5.93,2018.0,20.0,1.91,54.3


In [13]:
# check on FLAG field
opioid_df.FLAG.unique()

array([nan, '*'], dtype=object)

In [15]:
opioid_df.groupby('FLAG').count()

Unnamed: 0_level_0,INDICATOR,PANEL,PANEL_NUM,UNIT,UNIT_NUM,STUB_NAME,STUB_NAME_NUM,STUB_LABEL,STUB_LABEL_NUM,YEAR,YEAR_NUM,AGE,AGE_NUM,ESTIMATE
FLAG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
*,1111,1111,1111,1111,1111,1111,1111,1111,1111,1111,1111,1111,1111,0


In [14]:
opioid_df.query('FLAG == "*"')

Unnamed: 0,INDICATOR,PANEL,PANEL_NUM,UNIT,UNIT_NUM,STUB_NAME,STUB_NAME_NUM,STUB_LABEL,STUB_LABEL_NUM,YEAR,YEAR_NUM,AGE,AGE_NUM,ESTIMATE,FLAG
1104,Drug overdose death rates,Drug overdose deaths involving any opioid,1,"Deaths per 100,000 resident population, age-ad...",1,Sex and race,4,Male: Asian or Pacific Islander,4.40,1999,1,All ages,1.10,,*
1184,Drug overdose death rates,Drug overdose deaths involving any opioid,1,"Deaths per 100,000 resident population, age-ad...",1,Sex and race,4,Female: Asian or Pacific Islander,4.80,1999,1,All ages,1.10,,*
1185,Drug overdose death rates,Drug overdose deaths involving any opioid,1,"Deaths per 100,000 resident population, age-ad...",1,Sex and race,4,Female: Asian or Pacific Islander,4.80,2000,2,All ages,1.10,,*
1186,Drug overdose death rates,Drug overdose deaths involving any opioid,1,"Deaths per 100,000 resident population, age-ad...",1,Sex and race,4,Female: Asian or Pacific Islander,4.80,2001,3,All ages,1.10,,*
1187,Drug overdose death rates,Drug overdose deaths involving any opioid,1,"Deaths per 100,000 resident population, age-ad...",1,Sex and race,4,Female: Asian or Pacific Islander,4.80,2002,4,All ages,1.10,,*
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6217,Drug overdose death rates,Drug overdose deaths involving heroin,5,"Deaths per 100,000 resident population, crude",2,Sex and age,3,Male: 75-84 years,3.18,2018,20,75-84 years,1.90,,*
6218,Drug overdose death rates,Drug overdose deaths involving heroin,5,"Deaths per 100,000 resident population, crude",2,Sex and age,3,Male: 85 years and over,3.19,2018,20,85 years and over,1.91,,*
6219,Drug overdose death rates,Drug overdose deaths involving heroin,5,"Deaths per 100,000 resident population, crude",2,Sex and age,3,Female: Under 15 years,3.21,2018,20,Under 15 years,1.20,,*
6226,Drug overdose death rates,Drug overdose deaths involving heroin,5,"Deaths per 100,000 resident population, crude",2,Sex and age,3,Female: 75-84 years,3.28,2018,20,75-84 years,1.90,,*


In [22]:
opioid_df.apply(lambda x: list(x.unique()))

INDICATOR                               [Drug overdose death rates]
PANEL             [All drug overdose deaths, Drug overdose death...
PANEL_NUM                                        [0, 1, 2, 3, 4, 5]
UNIT              [Deaths per 100,000 resident population, age-a...
UNIT_NUM                                                     [1, 2]
STUB_NAME         [Total, Sex, Sex and race, Sex and race and Hi...
STUB_NAME_NUM                                    [0, 2, 4, 5, 1, 3]
STUB_LABEL        [All persons, Male, Female, Male: White, Male:...
STUB_LABEL_NUM    [0.1, 2.1, 2.2, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, ...
YEAR              [1999, 2000, 2001, 2002, 2003, 2004, 2005, 200...
YEAR_NUM          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
AGE               [All ages, Under 15 years, 15-24 years, 25-34 ...
AGE_NUM           [1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, ...
ESTIMATE          [6.1, 6.2, 6.8, 8.2, 8.9, 9.4, 10.1, 11.5, 11....
FLAG                                            

In [28]:
pd.pivot_table(index='INDICATOR', data=opioid_df)

Unnamed: 0_level_0,AGE_NUM,ESTIMATE,PANEL_NUM,STUB_LABEL_NUM,STUB_NAME_NUM,UNIT_NUM,YEAR,YEAR_NUM
INDICATOR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Drug overdose death rates,1.354913,4.743443,2.5,3.383006,3.028902,1.578035,2008.66474,10.66474
