# Treatment Episode Data Set (TEDS)
[2019 Admissions data](https://www.datafiles.samhsa.gov/dataset/treatment-episode-data-set-admissions-2019-teds-2019-ds0001)
[2019 Discharge data](https://www.datafiles.samhsa.gov/dataset/teds-d-2019-ds0001-teds-d-2019-ds0001)

In [1]:
import pandas as pd
import numpy as np

# Load in data
admissions = 'tedsa_puf_2019.csv'
discharges = 'tedsd_puf_2019.csv'
df_admissions = pd.read_csv(f'../../Downloads/{admissions}')
# dfd = pd.read_csv(f'../../Downloads/{discharges}')
# df_admissions = df_admissions.replace(-9, np.nan)

# Helper functions
def breakdown(data, column):
    data_grouped = data[['CASEID', column]].groupby(column).count()
    data_grouped['PERCENT'] = round(100*data_grouped/len(data), 1)
    data_grouped = data_grouped.rename(columns={'CASEID': 'COUNT'})
    return data_grouped

In [2]:
breakdown(df_admissions, 'SUB1')

Unnamed: 0_level_0,COUNT,PERCENT
SUB1,Unnamed: 1_level_1,Unnamed: 2_level_1
-9,109826,5.9
1,32317,1.7
2,574148,30.8
3,107363,5.8
4,208843,11.2
5,438288,23.5
6,2631,0.1
7,124691,6.7
8,5049,0.3
9,2227,0.1


In [3]:
print(f'There are {len(df_admissions.columns)} columns')

There are 62 columns


In [4]:
df_opioid_users = df_admissions[df_admissions['SUB1'].isin([5, 6, 7])]
print(f'Patients with SUB1 = 5, 6, 7 represent {100*round(len(df_opioid_users)/len(df_admissions), 2)}% of the patient population.')
print(f'Patients with SUB1 = 5, 6, 7 represent {len(df_opioid_users)} patients.')

Patients with SUB1 = 5, 6, 7 represent 30.0% of the patient population.
Patients with SUB1 = 5, 6, 7 represent 565610 patients.


In [5]:
breakdown(df_opioid_users, 'SUB1')

Unnamed: 0_level_0,COUNT,PERCENT
SUB1,Unnamed: 1_level_1,Unnamed: 2_level_1
5,438288,77.5
6,2631,0.5
7,124691,22.0


In [6]:
breakdown(df_opioid_users, 'METHUSE')

Unnamed: 0_level_0,COUNT,PERCENT
METHUSE,Unnamed: 1_level_1,Unnamed: 2_level_1
-9,41476,7.3
1,211743,37.4
2,312391,55.2


In [7]:
# Breakdown for group 5, 6, 7
for i in range(5,8):
    print(f'\nFrequencies for SUB1 group {i}:')
    df_temp = df_opioid_users[df_opioid_users['SUB1']==i]
    print(breakdown(df_temp, 'METHUSE'))


Frequencies for SUB1 group 5:
          COUNT  PERCENT
METHUSE                 
-9        28326      6.5
 1       164804     37.6
 2       245158     55.9

Frequencies for SUB1 group 6:
         COUNT  PERCENT
METHUSE                
-9         187      7.1
 1        1226     46.6
 2        1218     46.3

Frequencies for SUB1 group 7:
         COUNT  PERCENT
METHUSE                
-9       12963     10.4
 1       45713     36.7
 2       66015     52.9


In [8]:
# All the columns are categorical (though many are numerical)
non_numeric_columns = ['ADMYR', 'CASEID', 'STFIPS', 'CBSA2010', 'EDUC', 'MARSTAT', 'SERVICES', 'DETCRIM', 'NOPRIOR', 'PSOURCE', 'ARRESTS', 'EMPLOY',
                       'METHUSE', 'PSYPROB', 'PREG', 'GENDER', 'VET', 'LIVARAG', 'DAYWAIT', 'DSMCRIT', 'AGE', 'RACE', 'ETHNIC', 'DETNLF', 'PRIMINC',
                       'SUB1', 'SUB2', 'SUB3', 'ROUTE1', 'ROUTE2', 'ROUTE3', 'FREQ1', 'FREQ2', 'FREQ3', 'FRSTUSE1', 'FRSTUSE2', 'FRSTUSE3', 'HLTHINS',
                       'PRIMPAY', 'FREQ_ATND_SELF_HELP', 'ALCFLG', 'COKEFLG', 'MARFLG', 'HERFLG', 'METHFLG', 'OPSYNFLG', 'PCPFLG', 'HALLFLG',
                       'MTHAMFLG', 'AMPHFLG', 'STIMFLG', 'BENZFLG', 'TRNQFLG', 'BARBFLG', 'SEDHPFLG', 'INHFLG', 'OTCFLG', 'OTHERFLG', 'DIVISION',
                       'REGION', 'IDU', 'ALCDRUG']

numeric_columns = []

In [9]:
# Separate into three groups: treated, nontreated, unknown
dfo_treated = df_opioid_users[df_opioid_users['METHUSE']==1]
df_nontreated = df_opioid_users[df_opioid_users['METHUSE']==2]
dfo_unknown = df_opioid_users[df_opioid_users['METHUSE']==-9]