In [12]:
%matplotlib inline

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='white')

import utils
from utils import decorate
from thinkstats2 import Pmf, Cdf

In [13]:
df = pd.read_pickle('cleaned_nsduh.pkl')
df.head(3)

Unnamed: 0,SEX,AGE,PRES_PAINKILLER_USE,ANY_PAINKILLER_LIFETIME,OXYCONTIN_USE,OXYCONTIN_MISUSED,INCOME,FAMILY_INCOME,EDUCATION,CAIDCHIP_HI,PRIVATE_HI,ANY_HI,REC_PAINKILLER_USE,REC_PAINKILLER_MISUSE,HEALTH_INSURANCE
0,Male,65+,Never used,Not used,Not used in past year,Did not misuse - past year,75k+,75k+,College Graduate,Doesn't have Medicaid/CHIP,Has Private HI,With HI,Never,Never misused,Has Private HI
1,Male,35-49,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,30-40k,30-40k,College Graduate,Has Medicaid/CHIP,Doesn't have Private HI,With HI,Never,Never misused,Has Medicaid/CHIP
2,Male,35-49,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,50-75k,75k+,Some Coll/Assoc Degree,Doesn't have Medicaid/CHIP,Has Private HI,With HI,Never,Never misused,Has Private HI


In [14]:
cs = pd.crosstab(df.SEX, df.PRES_PAINKILLER_USE, margins=True, rownames=['Sex'], colnames=['Used Prescription Painkillers'], normalize="all")
(cs*100).round(decimals=2)

Used Prescription Painkillers,Used within year,Used in 12+ months,Never used,All
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,13.07,11.01,23.97,48.04
Female,16.98,12.67,22.31,51.96
All,46.27,23.68,30.05,100.0


In [15]:
cs = pd.crosstab(df.ANY_HI, df.OXYCONTIN_USE, margins=True, rownames=['Health Insurance'], colnames=['Used Prescription Painkillers'], normalize="all")
(cs*100).round(decimals=2)

Used Prescription Painkillers,Used in past year,Not used in past year,All
Health Insurance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
With HI,2.87,87.73,90.6
Without HI,0.36,9.03,9.4
All,96.76,3.24,100.0


In [16]:
cs = pd.crosstab(df["HEALTH_INSURANCE"], df["PRES_PAINKILLER_USE"], margins=True, rownames=['Health Insurance'], colnames=['Used Prescription Painkillers'], normalize='index')
(cs*100).round(decimals=2)

Used Prescription Painkillers,Used within year,Used in 12+ months,Never used
Health Insurance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Has Medicaid/CHIP,32.19,12.9,54.91
Has Private HI,29.0,28.66,42.34
Has any other HI,34.2,23.45,42.35
Without any HI,28.18,19.23,52.59
All,46.27,23.68,30.05


In [17]:

cs = pd.crosstab(df["HEALTH_INSURANCE"], df["PRES_PAINKILLER_USE"], margins=True, rownames=['Health Insurance'], colnames=['Used Prescription Painkillers'], normalize='index')
(cs*100).round(decimals=2)

Used Prescription Painkillers,Used within year,Used in 12+ months,Never used
Health Insurance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Has Medicaid/CHIP,32.19,12.9,54.91
Has Private HI,29.0,28.66,42.34
Has any other HI,34.2,23.45,42.35
Without any HI,28.18,19.23,52.59
All,46.27,23.68,30.05


In [18]:
cs = pd.crosstab(df["HEALTH_INSURANCE"], df["OXYCONTIN_MISUSED"], margins=True, rownames=['Health Insurance'], colnames=['Misused Prescription Painkillers'], normalize='index')
(cs*100).round(decimals=2)

Misused Prescription Painkillers,Misused - past year,Did not misuse - past year
Health Insurance,Unnamed: 1_level_1,Unnamed: 2_level_1
Has Medicaid/CHIP,0.93,99.07
Has Private HI,0.59,99.41
Has any other HI,0.52,99.48
Without any HI,1.53,98.47
All,99.25,0.75


In [19]:
ever_used = df[df.PRES_PAINKILLER_USE != "Never used"]
ever_used

Unnamed: 0,SEX,AGE,PRES_PAINKILLER_USE,ANY_PAINKILLER_LIFETIME,OXYCONTIN_USE,OXYCONTIN_MISUSED,INCOME,FAMILY_INCOME,EDUCATION,CAIDCHIP_HI,PRIVATE_HI,ANY_HI,REC_PAINKILLER_USE,REC_PAINKILLER_MISUSE,HEALTH_INSURANCE
1,Male,35-49,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,30-40k,30-40k,College Graduate,Has Medicaid/CHIP,Doesn't have Private HI,With HI,Never,Never misused,Has Medicaid/CHIP
2,Male,35-49,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,50-75k,75k+,Some Coll/Assoc Degree,Doesn't have Medicaid/CHIP,Has Private HI,With HI,Never,Never misused,Has Private HI
3,Male,35-49,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,75k+,75k+,College Graduate,Doesn't have Medicaid/CHIP,Doesn't have Private HI,With HI,Never,Never misused,Has any other HI
8,Male,19,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,10k or less,10k or less,Less High School,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Never,Never misused,Without any HI
13,Female,50-64,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,10-20k,30-40k,High School Grad,Doesn't have Medicaid/CHIP,Doesn't have Private HI,With HI,Never,Never misused,Has any other HI
18,Male,26-29,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,20-30k,40-50k,High School Grad,Doesn't have Medicaid/CHIP,Has Private HI,With HI,Within more than 12 months,Misused before 12+ months,Has Private HI
19,Female,15,Used within year,Used (assumed),Not used in past year,Did not misuse - past year,10k or less,50-75k,12-17 year olds,Has Medicaid/CHIP,Doesn't have Private HI,With HI,Never,Never misused,Has Medicaid/CHIP
20,Male,22-23,Used within year,Used (assumed),Not used in past year,Did not misuse - past year,30-40k,40-50k,Less High School,Doesn't have Medicaid/CHIP,Has Private HI,With HI,Never,Never misused,Has Private HI
21,Female,24-25,Used within year,Used (assumed),Used in past year,Did not misuse - past year,20-30k,20-30k,Some Coll/Assoc Degree,Doesn't have Medicaid/CHIP,Has Private HI,With HI,Never,Never misused,Has Private HI
22,Female,17,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,10k or less,75k+,12-17 year olds,Doesn't have Medicaid/CHIP,Has Private HI,With HI,Never,Never misused,Has Private HI


In [20]:
cs = pd.crosstab(ever_used["HEALTH_INSURANCE"], ever_used["OXYCONTIN_MISUSED"], margins=True, rownames=['Health Insurance (used prescription painkillers)'], colnames=['Misused Prescription Painkillers'], normalize='index')
(cs*100).round(decimals=2)

Misused Prescription Painkillers,Misused - past year,Did not misuse - past year
Health Insurance (used prescription painkillers),Unnamed: 1_level_1,Unnamed: 2_level_1
Has Medicaid/CHIP,2.07,97.93
Has Private HI,1.02,98.98
Has any other HI,0.9,99.1
Without any HI,3.23,96.77
All,98.6,1.4


In [21]:
no_hi = df[df.HEALTH_INSURANCE == "Without any HI"]
no_hi

Unnamed: 0,SEX,AGE,PRES_PAINKILLER_USE,ANY_PAINKILLER_LIFETIME,OXYCONTIN_USE,OXYCONTIN_MISUSED,INCOME,FAMILY_INCOME,EDUCATION,CAIDCHIP_HI,PRIVATE_HI,ANY_HI,REC_PAINKILLER_USE,REC_PAINKILLER_MISUSE,HEALTH_INSURANCE
7,Female,22-23,Never used,Don't Know,Not used in past year,Did not misuse - past year,10k or less,30-40k,College Graduate,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Unknown,Never misused,Without any HI
8,Male,19,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,10k or less,10k or less,Less High School,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Never,Never misused,Without any HI
11,Female,19,Never used,Not used,Not used in past year,Did not misuse - past year,10k or less,50-75k,Some Coll/Assoc Degree,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Never,Never misused,Without any HI
15,Male,30-34,Never used,Not used,Not used in past year,Did not misuse - past year,75k+,75k+,Some Coll/Assoc Degree,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Never,Never misused,Without any HI
33,Female,22-23,Used within year,Used (assumed),Not used in past year,Did not misuse - past year,10-20k,10-20k,High School Grad,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Never,Never misused,Without any HI
74,Male,24-25,Used in 12+ months,Used,Not used in past year,Did not misuse - past year,10-20k,20-30k,High School Grad,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Never,Never misused,Without any HI
76,Male,35-49,Used within year,Used (assumed),Not used in past year,Did not misuse - past year,10k or less,10-20k,Less High School,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Never,Never misused,Without any HI
86,Female,35-49,Never used,Not used,Not used in past year,Did not misuse - past year,10k or less,10-20k,College Graduate,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Never,Never misused,Without any HI
96,Male,20,Used within year,Used (assumed),Used in past year,Did not misuse - past year,10k or less,20-30k,High School Grad,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Within more than 12 months,Misused before 12+ months,Without any HI
97,Female,35-49,Used within year,Used (assumed),Not used in past year,Did not misuse - past year,20-30k,40-50k,High School Grad,Doesn't have Medicaid/CHIP,Doesn't have Private HI,Without HI,Never,Never misused,Without any HI


In [24]:
cs = pd.crosstab(df["REC_PAINKILLER_USE"], df["REC_PAINKILLER_MISUSE"], margins=True, rownames=['Use'], colnames=['Misuse'], normalize='index')
(cs*100).round(decimals=2)

Misuse,Misused in last 30 days,Misused 31 days-12 months,Misused before 12+ months,Never misused
Use,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Within 30 days,100.0,0.0,0.0,0.0
31 days-12 months,0.0,100.0,0.0,0.0
Within more than 12 months,0.0,0.0,100.0,0.0
Within 12 months,25.0,75.0,0.0,0.0
Within lifetime,13.51,21.62,64.86,0.0
Within 30 days (log assn),100.0,0.0,0.0,0.0
Unknown,0.0,0.0,3.3,96.7
Never,0.0,0.0,0.0,100.0
Blank,0.93,2.8,2.34,93.93
All,89.46,5.7,3.44,1.4
