Load data

In [62]:
import tensorflow as tf
import pandas as pd

# Load the dataset
cols = ['ID', 'Age', 'Gender', 'Education', 'Country', 'Ethnicity', 'N-Score', 'E-Score', 'O-Score', 'A-Score', 'C-Score', 'Impulsive', 'ImpSS', 'Alcohol', 'Amphet',
        'Amyl', 'Benzos', 'Caff', 'Cannabis', 'Choc', 'Coke', 'Crack', 'Ecstasy', 'Heroin', 'Ketamine', 'Legalh', 'LSD', 'Meth', 'Mushrooms', 'Nicotine', 'Semer', 'VSA']
df = pd.read_csv('./data/drug_consumption.data', names=cols)

Data Preprocessing

In [63]:
def convert_to_binary(cl_value):
    return 0 if cl_value in ['CL0', 'CL1', 'CL2'] else 1

for col in cols[13:]:
    df[col] = df[col].apply(convert_to_binary)


df.pop('ID')

x = df.iloc[:, :12]
drugs = cols[13:]

Model


In [64]:
from sklearn.metrics import classification_report
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

scaler = StandardScaler()

epochs = 500
batch_size = 8
dropout = .25

for drug in drugs:
    print(f"----- {drug} Classification Report -----")
    y = df[drug]

    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.25, random_state=42)

    # Standardize features
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256, input_shape=(x_train.shape[1],)),
        tf.keras.layers.LeakyReLU(),
        tf.keras.layers.Dropout(dropout),
        
        tf.keras.layers.Dense(128),
        tf.keras.layers.LeakyReLU(),
        tf.keras.layers.Dropout(dropout),
        
        tf.keras.layers.Dense(64),
        tf.keras.layers.LeakyReLU(),
        tf.keras.layers.Dropout(dropout),
        
        tf.keras.layers.Dense(32),
        tf.keras.layers.LeakyReLU(),
        tf.keras.layers.Dropout(dropout),
        
        tf.keras.layers.Dense(16),
        tf.keras.layers.LeakyReLU(),
        tf.keras.layers.Dropout(dropout),
        
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy',
                  metrics=['accuracy'])

    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size,
              callbacks=tf.keras.callbacks.EarlyStopping(
                  monitor='val_loss', patience=10, verbose=1),
              validation_data=(x_test, y_test), verbose=0)

    # Predictions
    Y_pred = model.predict(x_test)
    Y_pred = np.where(Y_pred > 0.5, 1, 0)

    # Classification report
    print(classification_report(y_test, Y_pred,
          target_names=['Non-User', 'User'], zero_division=1))

----- Alcohol Classification Report -----
Epoch 26: early stopping
              precision    recall  f1-score   support

    Non-User       0.00      0.00      1.00        36
        User       0.92      1.00      0.96       436

    accuracy                           0.92       472
   macro avg       0.46      0.50      0.98       472
weighted avg       0.85      0.92      0.96       472

----- Amphet Classification Report -----
Epoch 16: early stopping
              precision    recall  f1-score   support

    Non-User       0.84      0.88      0.86       366
        User       0.51      0.42      0.46       106

    accuracy                           0.78       472
   macro avg       0.67      0.65      0.66       472
weighted avg       0.77      0.78      0.77       472

----- Amyl Classification Report -----
Epoch 12: early stopping
              precision    recall  f1-score   support

    Non-User       0.93      1.00      0.96       437
        User       1.00      0.00      0