# Treatment Episode Data Set (TEDS)
[2019 Admissions data](https://www.datafiles.samhsa.gov/dataset/treatment-episode-data-set-admissions-2019-teds-2019-ds0001)
[2019 Discharge data](https://www.datafiles.samhsa.gov/dataset/teds-d-2019-ds0001-teds-d-2019-ds0001)

RESEARCH Q: “What predicts which individuals who report heroin(5)/non-prescription methadone(6)/other opiates and synthetics(7) as their primary substance use (“SUB1”) at admission in 2019 receive medication-assisted opioid therapy (“METHUSE”)?

In [1]:
import pandas as pd
import numpy as np

admissions = 'tedsa_puf_2019.csv'
discharges = 'tedsd_puf_2019.csv'
dfa = pd.read_csv(f'../../Downloads/{admissions}')
# dfd = pd.read_csv(f'../../Downloads/{discharges}')
# df2 = df.replace(-9, np.nan)

def breakdown(data, column):
    data_grouped = data[['CASEID', column]].groupby(column).count()
    data_grouped['PERCENT'] = round(100*data_grouped/len(data), 1)
    data_grouped = data_grouped.rename(columns={'CASEID': column})
    return data_grouped

In [9]:
dfa_freq = dfa.groupby('SUB1').count()/len(dfa)
dfa_freq.sort_values('CASEID', ascending=False)['CASEID']

SUB1
 2     0.307959
 5     0.235087
 10    0.112110
 4     0.112018
 7     0.066881
-9     0.058908
 3     0.057587
 1     0.017334
 13    0.009439
 19    0.007787
 11    0.005798
 8     0.002708
 12    0.001422
 6     0.001411
 9     0.001195
 16    0.001010
 15    0.000499
 17    0.000394
 18    0.000384
 14    0.000071
Name: CASEID, dtype: float64

In [63]:
print(len(dfa.columns))
dfa.columns

62


Index(['ADMYR', 'CASEID', 'STFIPS', 'CBSA2010', 'EDUC', 'MARSTAT', 'SERVICES',
       'DETCRIM', 'NOPRIOR', 'PSOURCE', 'ARRESTS', 'EMPLOY', 'METHUSE',
       'PSYPROB', 'PREG', 'GENDER', 'VET', 'LIVARAG', 'DAYWAIT', 'DSMCRIT',
       'AGE', 'RACE', 'ETHNIC', 'DETNLF', 'PRIMINC', 'SUB1', 'SUB2', 'SUB3',
       'ROUTE1', 'ROUTE2', 'ROUTE3', 'FREQ1', 'FREQ2', 'FREQ3', 'FRSTUSE1',
       'FRSTUSE2', 'FRSTUSE3', 'HLTHINS', 'PRIMPAY', 'FREQ_ATND_SELF_HELP',
       'ALCFLG', 'COKEFLG', 'MARFLG', 'HERFLG', 'METHFLG', 'OPSYNFLG',
       'PCPFLG', 'HALLFLG', 'MTHAMFLG', 'AMPHFLG', 'STIMFLG', 'BENZFLG',
       'TRNQFLG', 'BARBFLG', 'SEDHPFLG', 'INHFLG', 'OTCFLG', 'OTHERFLG',
       'DIVISION', 'REGION', 'IDU', 'ALCDRUG'],
      dtype='object')

In [65]:
dfo = dfa[dfa['SUB1'].isin([5, 6, 7])]
print(f'Patients with SUB1 = 5, 6, 7 represent {100*round(len(dfo)/len(dfa), 2)}% of the patient population.')
print(f'Patients with SUB1 = 5, 6, 7 represent {len(dfo)} patients.')

Patients with SUB1 = 5, 6, 7 represent 30.0% of the patient population.
Patients with SUB1 = 5, 6, 7 represent 565610 patients.


In [53]:
breakdown(dfo, 'SUB1')

Unnamed: 0_level_0,SUB1,PERCENT
SUB1,Unnamed: 1_level_1,Unnamed: 2_level_1
5,438288,77.5
6,2631,0.5
7,124691,22.0


In [54]:
breakdown(dfo, 'METHUSE')

Unnamed: 0_level_0,METHUSE,PERCENT
METHUSE,Unnamed: 1_level_1,Unnamed: 2_level_1
-9,41476,7.3
1,211743,37.4
2,312391,55.2


In [55]:
for i in range(5,8):
    print(f'\nFrequencies for SUB1 group {i}:')
    df_temp = dfo[dfo['SUB1']==i]
    print(breakdown(df_temp, 'METHUSE'))


Frequencies for SUB1 group 5:
         METHUSE  PERCENT
METHUSE                  
-9         28326      6.5
 1        164804     37.6
 2        245158     55.9

Frequencies for SUB1 group 6:
         METHUSE  PERCENT
METHUSE                  
-9           187      7.1
 1          1226     46.6
 2          1218     46.3

Frequencies for SUB1 group 7:
         METHUSE  PERCENT
METHUSE                  
-9         12963     10.4
 1         45713     36.7
 2         66015     52.9


In [69]:
# All of the columns are categorical (though many are numerical)
non_numeric_columns = ['ADMYR', 'CASEID', 'STFIPS', 'CBSA2010', 'EDUC', 'MARSTAT', 'SERVICES', 'DETCRIM', 'NOPRIOR', 'PSOURCE', 'ARRESTS', 'EMPLOY',
                       'METHUSE', 'PSYPROB', 'PREG', 'GENDER', 'VET', 'LIVARAG', 'DAYWAIT', 'DSMCRIT', 'AGE', 'RACE', 'ETHNIC', 'DETNLF', 'PRIMINC',
                       'SUB1', 'SUB2', 'SUB3', 'ROUTE1', 'ROUTE2', 'ROUTE3', 'FREQ1', 'FREQ2', 'FREQ3', 'FRSTUSE1', 'FRSTUSE2', 'FRSTUSE3', 'HLTHINS',
                       'PRIMPAY', 'FREQ_ATND_SELF_HELP', 'ALCFLG', 'COKEFLG', 'MARFLG', 'HERFLG', 'METHFLG', 'OPSYNFLG', 'PCPFLG', 'HALLFLG',
                       'MTHAMFLG', 'AMPHFLG', 'STIMFLG', 'BENZFLG', 'TRNQFLG', 'BARBFLG', 'SEDHPFLG', 'INHFLG', 'OTCFLG', 'OTHERFLG', 'DIVISION',
                       'REGION', 'IDU', 'ALCDRUG']

numeric_columns = []

In [70]:
# Separate into three groups: treated, nontreated, unknown
dfo_treated = dfo[dfo['METHUSE']==1]
dfo_nontreated = dfo[dfo['METHUSE']==2]
dfo_unknown = dfo[dfo['METHUSE']==-9]

In [71]:
dfo_treated[kept_columns].describe()

Unnamed: 0,ADMYR,CASEID,STFIPS,CBSA2010,EDUC,MARSTAT,SERVICES,DETCRIM,NOPRIOR,PSOURCE,...,TRNQFLG,BARBFLG,SEDHPFLG,INHFLG,OTCFLG,OTHERFLG,DIVISION,REGION,IDU,ALCDRUG
count,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0,...,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0,211743.0
mean,2019.0,20190880000.0,25.917008,17400.437469,2.135816,-0.246823,6.349442,-8.204493,1.785381,1.886631,...,0.000269,0.000383,0.002536,0.000264,0.000746,0.01516,3.905055,2.108414,0.485825,2.094624
std,0.0,489919.1,12.029746,17266.006984,3.17832,4.126517,1.416943,3.059279,2.542953,2.519023,...,0.016405,0.019555,0.050296,0.01626,0.027306,0.122189,2.610139,1.163393,0.4998,0.292696
min,2019.0,20190000000.0,1.0,-9.0,-9.0,-9.0,1.0,-9.0,-9.0,-9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
25%,2019.0,20190500000.0,23.0,-9.0,2.0,1.0,7.0,-9.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,2.0
50%,2019.0,20190820000.0,26.0,14460.0,3.0,1.0,7.0,-9.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.0,0.0,2.0
75%,2019.0,20191270000.0,36.0,35620.0,3.0,2.0,7.0,-9.0,4.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,5.0,3.0,1.0,2.0
max,2019.0,20191860000.0,72.0,49660.0,5.0,4.0,8.0,8.0,5.0,7.0,...,1.0,1.0,1.0,1.0,1.0,1.0,9.0,4.0,1.0,3.0


In [67]:
dfo_nontreated[kept_columns].describe()

Unnamed: 0,ADMYR,CASEID,STFIPS,CBSA2010,EDUC,MARSTAT,SERVICES,DETCRIM,NOPRIOR,PSOURCE,...,TRNQFLG,BARBFLG,SEDHPFLG,INHFLG,OTCFLG,OTHERFLG,DIVISION,REGION,IDU,ALCDRUG
count,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0,...,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0,312391.0
mean,2019.0,20190870000.0,26.611432,17090.752858,2.392847,-0.78016,4.734787,-7.222382,0.539846,2.636904,...,0.000387,0.000813,0.005371,0.00033,0.000755,0.018621,3.945645,2.185287,0.505732,2.148698
std,0.0,486678.5,13.484017,17355.826545,2.714528,4.489865,2.155244,4.475971,4.289367,2.984505,...,0.019677,0.028503,0.073093,0.018155,0.027475,0.135182,2.426313,1.083973,0.499968,0.355791
min,2019.0,20190000000.0,1.0,-9.0,-9.0,-9.0,1.0,-9.0,-9.0,-9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
25%,2019.0,20190470000.0,17.0,-9.0,2.0,1.0,2.0,-9.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,2.0
50%,2019.0,20190860000.0,25.0,14460.0,3.0,1.0,5.0,-9.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.0,1.0,2.0
75%,2019.0,20191250000.0,36.0,35620.0,3.0,1.0,7.0,-9.0,4.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,5.0,3.0,1.0,2.0
max,2019.0,20191860000.0,72.0,49700.0,5.0,4.0,8.0,8.0,5.0,7.0,...,1.0,1.0,1.0,1.0,1.0,1.0,9.0,4.0,1.0,3.0


In [None]:
dfo_unknown[kept_columns].describe()