In [1]:
import pandas as pd
drugs_df = pd.read_csv('de.csv')

In [12]:
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

In [2]:
def misuse_cat(df):
    # make new df so old dataset is not affected
    new_df = drugs_df.copy()
    # get list of drug names
    drugs = [x for x in df.columns if 'NMU' in x][:14]
    drugs = [x.split('_')[0] for x in drugs]
    print(drugs)
    # make categorical variable for each drug
    for drug in drugs:
        new_df[f'{drug}_MISUSE'] = new_df[f'{drug}_NMU'].fillna(0)
        new_df[f'{drug}_MISUSE'] = new_df[f'{drug}_MISUSE'].astype(int)
    # in the returned df, each drug now has a column indicating how the correspondent uses the drug
    # 0   -> never used
    # 1   -> used for perscription purposes
    # 2   -> used for recreational purposes
    return new_df, drugs

In [3]:
drugs_df, drugs = misuse_cat(drugs_df)

['FENT', 'BUP', 'METH', 'MORPH', 'OXY', 'TRAM', 'TAP', 'SUF', 'COD', 'DIHY', 'HYDM', 'STIM', 'BENZ', 'THC']


In [4]:
def misuse_total(df):
    new_df = drugs_df.copy()
    new_df['MIS_TOTAL'] = 0
    for drug in drugs:
        new_df['MIS_TOTAL'] = new_df['MIS_TOTAL'] + new_df[f'{drug}_MISUSE']
    return new_df

In [5]:
drugs_df = misuse_total(drugs_df)

In [6]:
# Binary for drug misuse
drugs_df['MIS_BIN'] = drugs_df['MIS_TOTAL']>0
drugs_df['MIS_BIN'] = drugs_df['MIS_BIN'].astype(int)

In [7]:
def got_pres_for_pain(df):
    # this function returns a list indicating if a person has received prescription for pain
    df_new = drugs_df.copy()
    PRES_FOR_PAIN = []
    for i in range(len(df_new)):
        # Have the patient received prescription for chronic or acute pain?
        PAIN_CHRONIC_RX = df_new.at[i, 'PAIN_CHRONIC_RX']
        PAIN_ACUTE_RX = df_new.at[i, 'PAIN_ACUTE_RX']
        PRES_FOR_PAIN.append(pd.notna(PAIN_CHRONIC_RX) or pd.notna(PAIN_ACUTE_RX))
    return PRES_FOR_PAIN

In [8]:
def get_illicit_use(df):
    # this function returns a list indicating if a person has used illicit drugs in the past
    df_new = df.copy()
    # list all illicit drugs
    illicit = ['CAN_USE','COKE_USE','CRACK_USE','MDMA_USE','SPEED_USE','GHB_USE','HEROIN_USE','KET_USE','LEGAL_USE','LSD_USE','MUSH_USE','STER_USE','NPFENT_USE','INH_USE','MEPH_USE']
    ILLICIT_USE = []
    for i in range(len(df_new)):
        illicit_use = 0
        for drug in illicit:
            if df_new.at[i, drug] != 1:
                illicit_use = 1
                break
        ILLICIT_USE.append(illicit_use)
    return ILLICIT_USE

In [9]:
def get_mental_condition(df):
    # this function returns a list indicating if a person has mental conditions
    df_new = df.copy()
    MENTAL_COND = []
    for i in range(len(df_new)):
        if df_new.at[i, 'MENT_NONE'] == 1:
            MENTAL_COND.append(0)
        else:
            MENTAL_COND.append(1)
    return MENTAL_COND

In [10]:
drugs_df['ILL_USE'] = get_illicit_use(drugs_df)
drugs_df['MENTAL'] = get_mental_condition(drugs_df)
drugs_df['PAIN_BIN'] = got_pres_for_pain(drugs_df)

In [35]:
X_var_list = drugs_df.columns.tolist()
remove_list = ['MIS_BIN', 'MIS_TOTAL', 'DATE', 'START_DATE']
for name in remove_list:
    X_var_list.remove(name)

In [36]:
df = drugs_df.fillna(0)

In [37]:
clf = tree.DecisionTreeClassifier()
tree_table = {}

for X_variable in X_var_list:
    y = df['MIS_BIN']
    X_list = [X_variable, 'STATUS']
    X = df[X_list]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4, random_state=42)
    
    clf = clf.fit(X_train, y_train)

    X_test_predict = clf.predict(X_test)
    accuracy = sum(y_test == X_test_predict)/len(X_test)
    tree_table[X_variable] = accuracy

In [40]:
high_tree = {}
for key in tree_table:
    if tree_table[key] >= 0.8:
        high_tree[key] = tree_table[key]

In [41]:
high_tree

{'FENT_NMU': 0.8119913635608703,
 'TRAM_NMU': 0.8030227536953994,
 'COD_NMU': 0.9302441454907823,
 'BENZ_NMU': 0.8026905829596412,
 'OP_NMU_EVER': 0.9659524995847866,
 'BENZ_NMU_EVER': 0.8026905829596412,
 'OP_NMU_YR': 0.8868958644743398,
 'OP_NMU_NTY': 0.8388971931572828,
 'OP_NMU_MNTH': 0.822454741737253,
 'OP_NMU_WK': 0.8099983391463212,
 'COD_NMU_NTY': 0.8216243148978575,
 'FENT_MISUSE': 0.8119913635608703,
 'TRAM_MISUSE': 0.8030227536953994,
 'COD_MISUSE': 0.9302441454907823,
 'BENZ_MISUSE': 0.8026905829596412}